In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
# Standard libraries
import os
from datetime import datetime

# PyTorch
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.tensorboard import SummaryWriter

# Progress tracking
from tqdm import tqdm

In [None]:
from vad.architectures import STAD
from vad.datasets import TrajectoryDataset, ExactBatchSampler

In [4]:
torch.set_num_threads(8)

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

cuda


# Experiment Parameters

In [4]:
exp_type = 'baseline'
include_weather = False
n_weather_vars = 0
embed_dim = 32
weight_decay = 0.1
dropout = 0.1
epochs = 100
learning_rate = 1e-5
hidden_dim_gmm = 32
latent_dim_ae = 32
patience = epochs
n_head_te = 8
n_layers_te = 4
n_components = 30
eps_gmm = 1e-7
eps_loss = 1

experiment_name = f'{exp_type}_epochs_{epochs}_pat_{patience}_embed_{embed_dim}_wd_{weight_decay}_lr_{learning_rate}_hgmm_{hidden_dim_gmm}_lae_{latent_dim_ae}_comp_{n_components}'
print(experiment_name)

baseline_epochs_100_pat_100_embed_32_wd_0.1_lr_1e-05_hgmm_32_lae_32_comp_30


# STAD Instantiation

In [6]:
stad = STAD(n_lat_bins=400,
            n_lon_bins=400,
            n_sog_bins=30,
            n_cog_bins=72,
            max_seq_len=10,
            embed_dim=embed_dim,
            dropout=dropout,
            nhead_te=n_head_te,
            n_layers_te=n_layers_te,
            latent_dim_ae=latent_dim_ae,
            n_weather_vars=n_weather_vars,
            hidden_dim_gmm=hidden_dim_gmm,
            eps_gmm=eps_gmm,
            n_components_gmm=n_components).to(device)
print(stad)

STAD(
  (embedding): TrajectoryEmbedding(
    (lat_embed): Embedding(400, 32)
    (lon_embed): Embedding(400, 32)
    (sog_embed): Embedding(30, 32)
    (cog_embed): Embedding(72, 32)
  )
  (transenc): TrajectoryTransformerEncoder(
    (pos_encoder): PositionalEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): TransformerEncoder(
      (layers): ModuleList(
        (0-3): 4 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
          )
          (linear1): Linear(in_features=128, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=128, bias=True)
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
         

# STAD Loss Function

In [7]:
def calculate_gmm_penalty(sigma, epsilon=eps_loss):
    """
    Vectorized computation of GMM penalty (sum of reciprocals of diagonal elements)

    sigma: Component covariances. Shape: [num_components, input_dim, input_dim]
    epsilon: Small value for numerical stability
    """
    # Extract diagonal elements from all covariance matrices at once
    # Shape: [num_components, input_dim]
    diag_elements = torch.diagonal(sigma, dim1=-2, dim2=-1)

    # Add epsilon for numerical stability before taking reciprocal
    # This prevents division by very small numbers
    penalty = torch.sum(1.0 / (diag_elements + epsilon))

    return penalty

def compute_full_loss(penalty, transformer_loss, energy, d,
                      lambda_1=1, lambda_2=1, lambda_3=5e-3):
                      # λ₁=1, λ₂=1, λ₃=0.005 as in the STAD publication
    return transformer_loss + (lambda_1 * energy) + (lambda_2 * d) + (lambda_3 * penalty)

# STAD Unbiased Loss Function

In [8]:
def evaluate_training_set(model, train_dataloader, device):
    """
    Evaluate the training set with model in eval mode to get unbiased loss.
    Returns the average training loss without gradients or dropout effects.
    """
    model.eval()
    total_train_loss = 0

    with torch.no_grad():
        for batch in tqdm(train_dataloader, desc="Evaluating Training Set"):
            # Move data to device
            inputs = {k: v.to(device) for k, v in batch.get('src_window').items()}
            targets = {k: v.to(device) for k, v in batch.get('tgt_window').items()}
            weather_stats = batch.get('weather_stats', None).to(device)

            # Forward pass (will use testing=False path due to eval mode)
            l, energy, d_h, sigma = model(inputs, targets, weather_stats)
            l, energy, d_h = l.mean(), energy.mean(), d_h.mean()

            # Calculate loss components
            penalty = calculate_gmm_penalty(sigma)
            penalty = penalty.mean()

            # Compute final loss
            train_eval_loss = compute_full_loss(penalty, l, energy, d_h).mean()

            # Accumulate loss
            total_train_loss += train_eval_loss.item()

    # Return average loss
    return total_train_loss / len(train_dataloader)

# STAD Validation Loop

In [9]:
def validate(model, dataloader, device):

    total_val_loss = 0
    total_energy = 0
    total_te_loss = 0

    model.eval()

    for batchidx, batch in enumerate(tqdm(dataloader, desc="Validation")):

        # Move data to device
        inputs = {k: v.to(device) for k, v in batch.get('src_window').items()}
        targets = {k: v.to(device) for k, v in batch.get('tgt_window').items()}
        weather_stats = batch.get('weather_stats', None).to(device)

        # Pass data to model
        l, energy, d_h, sigma = model(inputs, targets, weather_stats)
        l, energy, d_h = l.mean(), energy.mean(), d_h.mean()

        # Calculate loss components
        penalty = calculate_gmm_penalty(sigma)
        penalty = penalty.mean()

        # Compute the final loss
        stad_loss = compute_full_loss(penalty, l, energy, d_h)
        stad_loss = stad_loss.mean()

        # Update total validation loss and total energy
        total_val_loss += stad_loss.item()
        total_energy += energy.item()
        total_te_loss += l.item()

    # Calculate average validation loss and energy
    avg_val_loss = total_val_loss / len(dataloader)
    avg_energy = total_energy / len(dataloader)
    avg_te_loss = total_te_loss / len(dataloader)
    return avg_val_loss, avg_energy, avg_te_loss

# STAD Training Loop

In [None]:
def train(model,
          train_dataloader,
          valid_dataloader,
          optimizer,
          scheduler,
          num_epochs,
          device,
          patience,
          save_dir='./models'):

    # Create directory
    os.makedirs(save_dir, exist_ok=True)

    # Initialize TensorBoard writer
    timestamp = datetime.now().strftime('%b%d_%H-%M-%S')
    writer = SummaryWriter(log_dir=f'./runs/{timestamp}_{experiment_name}')

    # Initialize variables for early stopping
    best_val_loss = float('inf')
    patience_counter = 0

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0

        # Training phase
        for batchidx, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")):

            # Move data to device
            inputs = {k: v.to(device) for k, v in batch.get('src_window').items()}
            targets = {k: v.to(device) for k, v in batch.get('tgt_window').items()}
            weather_stats = batch.get('weather_stats', None).to(device)

            # Zero gradients
            optimizer.zero_grad()

            # Pass data to model
            l, energy, d_h, sigma = model(inputs, targets, weather_stats)
            l, energy, d_h = l.mean(), energy.mean(), d_h.mean()

            # Calculate loss components
            penalty = calculate_gmm_penalty(sigma)
            penalty = penalty.mean()

            # Compute the final loss
            stad_loss = compute_full_loss(penalty, l, energy, d_h).mean()

            # Update total loss for epoch
            train_loss += stad_loss.mean()

            # Print progress
            if batchidx % 200 == 0:
                writer.add_scalar('Batch/te_loss', l, epoch * len(train_dataloader) + batchidx)
                writer.add_scalar('Batch/Energy', energy, epoch * len(train_dataloader) + batchidx)
                writer.add_scalar('Batch/train_loss', stad_loss, epoch * len(train_dataloader) + batchidx)
                writer.add_scalar('Batch/Penalty', penalty*0.005, epoch * len(train_dataloader) + batchidx)
                print(f'Batch {batchidx}/{len(train_dataloader)} | Loss: {stad_loss:.6f}')

            # Backward pass and optimize
            stad_loss.backward()
            optimizer.step()
            scheduler.step()

            train_loss = train_loss.detach()

        # Calculate average training loss for this epoch
        avg_train_loss = train_loss / len(train_dataloader)
        true_loss = evaluate_training_set(model, train_dataloader, device) # already averaged

        # Validation phase
        val_loss, avg_energy, avg_te_loss = validate(model, valid_dataloader, device)

        # Log metrics to TensorBoard
        writer.add_scalar('Epoch/train_loss', avg_train_loss, epoch)
        writer.add_scalar('Epoch/validation_loss', val_loss, epoch)
        writer.add_scalar('Epoch/avg_energy', avg_energy, epoch)
        writer.add_scalar('Epoch/avg_te_loss', avg_te_loss, epoch)
        writer.add_scalar('Epoch/learning_rate', scheduler.get_last_lr()[0], epoch)
        writer.add_scalar('Epoch/true_loss', true_loss, epoch)

        # Print epoch summary
        print(f'Epoch {epoch+1}/{num_epochs} | Average Train Loss: {avg_train_loss:.6f} | Average Validation Loss: {val_loss:.6f}')

        # Save latest model
        latest_model_path = os.path.join(save_dir, 'STAD_latest.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': avg_train_loss,
            'val_loss': val_loss
        }, latest_model_path)

        # Check if this is the best model so far
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0

            # Save best model
            best_model_path = os.path.join(save_dir, 'STAD_best.pth')
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': val_loss
            }, best_model_path)
            print(f"Saved new best model with validation loss: {val_loss:.6f}")
        else:
            patience_counter += 1
            print(f"Validation loss did not improve. Patience: {patience_counter}/{patience}")

        # Early stopping check
        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs!")
            break

        # Free cached memory
        torch.cuda.empty_cache()

    # Close TensorBoard writer
    writer.close()

    # Load the best model
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model from epoch {checkpoint['epoch']+1} with validation loss: {checkpoint['val_loss']:.6f}")

    return model

# Dataset Dataloader

In [None]:
traj_dataset_train = TrajectoryDataset(ds_type='train',
                                       lat_bins=400,
                                       lon_bins=400,
                                       sog_bins=30,
                                       cog_bins=72,
                                       file_directory='../../data/ais',
                                       filename='ct_train.pkl',
                                       include_weather=include_weather)

traj_dataset_valid = TrajectoryDataset(ds_type='valid',
                                       lat_bins=400,
                                       lon_bins=400,
                                       sog_bins=30,
                                       cog_bins=72,
                                       file_directory='../../data/ais',
                                       filename='ct_valid.pkl',
                                       include_weather=include_weather)

In [12]:
train_batch_sampler = ExactBatchSampler(traj_dataset_train.batch_boundaries, shuffle_batches=True)
valid_batch_sampler = ExactBatchSampler(traj_dataset_valid.batch_boundaries, shuffle_batches=True)

In [13]:
data_loader_train = data.DataLoader(traj_dataset_train, batch_sampler=train_batch_sampler, num_workers=4, pin_memory=True, persistent_workers=True)
data_loader_valid = data.DataLoader(traj_dataset_valid, batch_sampler=valid_batch_sampler, num_workers=4, pin_memory=True, persistent_workers=True)

# Training call

In [None]:
optimizer = AdamW(stad.parameters(),
                 weight_decay=weight_decay)

scheduler = OneCycleLR(optimizer,
                    max_lr=learning_rate,            # Peak learning rate
                    epochs=epochs,
                    steps_per_epoch=len(data_loader_train),
                    anneal_strategy='cos'
)

In [None]:
final_model = train(stad,
                    data_loader_train,
                    data_loader_valid,
                    optimizer=optimizer,
                    scheduler=scheduler,
                    num_epochs=epochs,
                    device=device,
                    patience=patience,
                    save_dir=f'./models/{experiment_name}')

Epoch 1/100 [Train]:   0%|          | 5/4538 [00:00<06:24, 11.79it/s]

Batch 0/4538 | Loss: 240.857086


Epoch 1/100 [Train]:   5%|▍         | 208/4538 [00:05<01:47, 40.32it/s]

Batch 200/4538 | Loss: 239.363220


Epoch 1/100 [Train]:   9%|▉         | 408/4538 [00:10<01:41, 40.86it/s]

Batch 400/4538 | Loss: 237.805359


Epoch 1/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:36, 40.76it/s]

Batch 600/4538 | Loss: 240.165634


Epoch 1/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:31, 40.97it/s]

Batch 800/4538 | Loss: 233.472900


Epoch 1/100 [Train]:  22%|██▏       | 1008/4538 [00:25<01:26, 40.93it/s]

Batch 1000/4538 | Loss: 243.482727


Epoch 1/100 [Train]:  27%|██▋       | 1208/4538 [00:30<01:21, 40.76it/s]

Batch 1200/4538 | Loss: 235.325821


Epoch 1/100 [Train]:  31%|███       | 1408/4538 [00:35<01:16, 40.91it/s]

Batch 1400/4538 | Loss: 237.948380


Epoch 1/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:11, 41.05it/s]

Batch 1600/4538 | Loss: 231.307571


Epoch 1/100 [Train]:  40%|███▉      | 1808/4538 [00:44<01:06, 41.17it/s]

Batch 1800/4538 | Loss: 235.969025


Epoch 1/100 [Train]:  44%|████▍     | 2008/4538 [00:49<01:01, 41.14it/s]

Batch 2000/4538 | Loss: 239.350388


Epoch 1/100 [Train]:  49%|████▊     | 2208/4538 [00:54<00:56, 41.00it/s]

Batch 2200/4538 | Loss: 230.639832


Epoch 1/100 [Train]:  53%|█████▎    | 2408/4538 [00:59<00:51, 41.11it/s]

Batch 2400/4538 | Loss: 242.323410


Epoch 1/100 [Train]:  57%|█████▋    | 2608/4538 [01:04<00:47, 40.81it/s]

Batch 2600/4538 | Loss: 238.247467


Epoch 1/100 [Train]:  62%|██████▏   | 2808/4538 [01:09<00:42, 40.85it/s]

Batch 2800/4538 | Loss: 232.770599


Epoch 1/100 [Train]:  66%|██████▋   | 3008/4538 [01:14<00:37, 41.08it/s]

Batch 3000/4538 | Loss: 237.732056


Epoch 1/100 [Train]:  71%|███████   | 3208/4538 [01:19<00:32, 41.03it/s]

Batch 3200/4538 | Loss: 223.891403


Epoch 1/100 [Train]:  75%|███████▌  | 3408/4538 [01:23<00:27, 40.86it/s]

Batch 3400/4538 | Loss: 229.354568


Epoch 1/100 [Train]:  80%|███████▉  | 3608/4538 [01:28<00:22, 41.02it/s]

Batch 3600/4538 | Loss: 232.814301


Epoch 1/100 [Train]:  84%|████████▍ | 3808/4538 [01:33<00:17, 40.92it/s]

Batch 3800/4538 | Loss: 219.333252


Epoch 1/100 [Train]:  88%|████████▊ | 4008/4538 [01:38<00:13, 40.70it/s]

Batch 4000/4538 | Loss: 234.238617


Epoch 1/100 [Train]:  93%|█████████▎| 4208/4538 [01:43<00:08, 41.10it/s]

Batch 4200/4538 | Loss: 227.501404


Epoch 1/100 [Train]:  97%|█████████▋| 4408/4538 [01:48<00:03, 40.83it/s]

Batch 4400/4538 | Loss: 240.146866


Epoch 1/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.71it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.96it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.57it/s]


Epoch 1/100 | Average Train Loss: 235.862839 | Average Validation Loss: 231.682650
Saved new best model with validation loss: 231.682650


Epoch 2/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 228.800476


Epoch 2/100 [Train]:   4%|▍         | 204/4538 [00:05<01:45, 41.00it/s]

Batch 200/4538 | Loss: 236.571579


Epoch 2/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.12it/s]

Batch 400/4538 | Loss: 243.943283


Epoch 2/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.32it/s]

Batch 600/4538 | Loss: 243.521667


Epoch 2/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 40.86it/s]

Batch 800/4538 | Loss: 237.975983


Epoch 2/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.25it/s]

Batch 1000/4538 | Loss: 239.170319


Epoch 2/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 40.93it/s]

Batch 1200/4538 | Loss: 218.353806


Epoch 2/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 40.99it/s]

Batch 1400/4538 | Loss: 242.941864


Epoch 2/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:11, 41.15it/s]

Batch 1600/4538 | Loss: 228.414902


Epoch 2/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:05, 41.44it/s]

Batch 1800/4538 | Loss: 231.749710


Epoch 2/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 40.88it/s]

Batch 2000/4538 | Loss: 235.288910


Epoch 2/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.17it/s]

Batch 2200/4538 | Loss: 225.007843


Epoch 2/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.90it/s]

Batch 2400/4538 | Loss: 231.908295


Epoch 2/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 41.09it/s]

Batch 2600/4538 | Loss: 218.713303


Epoch 2/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.28it/s]

Batch 2800/4538 | Loss: 232.509888


Epoch 2/100 [Train]:  66%|██████▌   | 3004/4538 [01:13<00:37, 41.07it/s]

Batch 3000/4538 | Loss: 243.813522


Epoch 2/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.04it/s]

Batch 3200/4538 | Loss: 225.418320


Epoch 2/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.06it/s]

Batch 3400/4538 | Loss: 225.295700


Epoch 2/100 [Train]:  79%|███████▉  | 3605/4538 [01:27<00:24, 38.53it/s]

Batch 3600/4538 | Loss: 214.209641


Epoch 2/100 [Train]:  84%|████████▍ | 3805/4538 [01:32<00:19, 38.45it/s]

Batch 3800/4538 | Loss: 240.869934


Epoch 2/100 [Train]:  88%|████████▊ | 4005/4538 [01:38<00:13, 38.52it/s]

Batch 4000/4538 | Loss: 217.610580


Epoch 2/100 [Train]:  93%|█████████▎| 4205/4538 [01:43<00:08, 38.56it/s]

Batch 4200/4538 | Loss: 232.040298


Epoch 2/100 [Train]:  97%|█████████▋| 4405/4538 [01:48<00:03, 37.79it/s]

Batch 4400/4538 | Loss: 199.029861


Epoch 2/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 134.99it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.21it/s]


Epoch 2/100 | Average Train Loss: 228.766083 | Average Validation Loss: 226.017269
Saved new best model with validation loss: 226.017269


Epoch 3/100 [Train]:   0%|          | 8/4538 [00:00<02:07, 35.59it/s]

Batch 0/4538 | Loss: 228.946762


Epoch 3/100 [Train]:   5%|▍         | 208/4538 [00:05<01:52, 38.53it/s]

Batch 200/4538 | Loss: 237.104309


Epoch 3/100 [Train]:   9%|▉         | 408/4538 [00:10<01:46, 38.89it/s]

Batch 400/4538 | Loss: 232.285522


Epoch 3/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:42, 38.45it/s]

Batch 600/4538 | Loss: 221.407715


Epoch 3/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:35, 39.03it/s]

Batch 800/4538 | Loss: 228.518738


Epoch 3/100 [Train]:  22%|██▏       | 1007/4538 [00:25<01:24, 41.63it/s]

Batch 1000/4538 | Loss: 229.910110


Epoch 3/100 [Train]:  27%|██▋       | 1207/4538 [00:30<01:22, 40.60it/s]

Batch 1200/4538 | Loss: 235.588928


Epoch 3/100 [Train]:  31%|███       | 1407/4538 [00:35<01:20, 38.81it/s]

Batch 1400/4538 | Loss: 233.800674


Epoch 3/100 [Train]:  35%|███▌      | 1607/4538 [00:40<01:15, 38.83it/s]

Batch 1600/4538 | Loss: 239.034241


Epoch 3/100 [Train]:  40%|███▉      | 1807/4538 [00:45<01:11, 38.32it/s]

Batch 1800/4538 | Loss: 207.277802


Epoch 3/100 [Train]:  44%|████▍     | 2007/4538 [00:50<01:01, 41.21it/s]

Batch 2000/4538 | Loss: 229.950867


Epoch 3/100 [Train]:  49%|████▊     | 2207/4538 [00:55<00:56, 41.43it/s]

Batch 2200/4538 | Loss: 223.783203


Epoch 3/100 [Train]:  53%|█████▎    | 2407/4538 [01:00<00:52, 40.64it/s]

Batch 2400/4538 | Loss: 231.930939


Epoch 3/100 [Train]:  57%|█████▋    | 2607/4538 [01:05<00:46, 41.35it/s]

Batch 2600/4538 | Loss: 209.121597


Epoch 3/100 [Train]:  62%|██████▏   | 2807/4538 [01:10<00:41, 41.39it/s]

Batch 2800/4538 | Loss: 241.448929


Epoch 3/100 [Train]:  66%|██████▋   | 3007/4538 [01:14<00:36, 41.38it/s]

Batch 3000/4538 | Loss: 233.607651


Epoch 3/100 [Train]:  71%|███████   | 3206/4538 [01:19<00:34, 38.87it/s]

Batch 3200/4538 | Loss: 213.773468


Epoch 3/100 [Train]:  75%|███████▌  | 3406/4538 [01:25<00:32, 34.49it/s]

Batch 3400/4538 | Loss: 210.020416


Epoch 3/100 [Train]:  79%|███████▉  | 3606/4538 [01:31<00:27, 34.41it/s]

Batch 3600/4538 | Loss: 234.275101


Epoch 3/100 [Train]:  84%|████████▍ | 3806/4538 [01:37<00:21, 34.63it/s]

Batch 3800/4538 | Loss: 220.015488


Epoch 3/100 [Train]:  88%|████████▊ | 4006/4538 [01:42<00:15, 34.53it/s]

Batch 4000/4538 | Loss: 205.848175


Epoch 3/100 [Train]:  93%|█████████▎| 4206/4538 [01:48<00:09, 34.55it/s]

Batch 4200/4538 | Loss: 219.051865


Epoch 3/100 [Train]:  97%|█████████▋| 4406/4538 [01:54<00:03, 34.50it/s]

Batch 4400/4538 | Loss: 228.204651


Epoch 3/100 [Train]: 100%|██████████| 4538/4538 [01:58<00:00, 38.39it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.52it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 89.36it/s]


Epoch 3/100 | Average Train Loss: 223.625366 | Average Validation Loss: 221.085179
Saved new best model with validation loss: 221.085179


Epoch 4/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 244.519806


Epoch 4/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.53it/s]

Batch 200/4538 | Loss: 218.177643


Epoch 4/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.49it/s]

Batch 400/4538 | Loss: 227.252808


Epoch 4/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.59it/s]

Batch 600/4538 | Loss: 216.930557


Epoch 4/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.51it/s]

Batch 800/4538 | Loss: 222.932831


Epoch 4/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.57it/s]

Batch 1000/4538 | Loss: 228.738525


Epoch 4/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.43it/s]

Batch 1200/4538 | Loss: 235.458328


Epoch 4/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.42it/s]

Batch 1400/4538 | Loss: 237.829910


Epoch 4/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.34it/s]

Batch 1600/4538 | Loss: 206.457779


Epoch 4/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.37it/s]

Batch 1800/4538 | Loss: 234.868835


Epoch 4/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.36it/s]

Batch 2000/4538 | Loss: 234.424316


Epoch 4/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.44it/s]

Batch 2200/4538 | Loss: 203.536423


Epoch 4/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.40it/s]

Batch 2400/4538 | Loss: 238.092484


Epoch 4/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.39it/s]

Batch 2600/4538 | Loss: 215.840775


Epoch 4/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.44it/s]

Batch 2800/4538 | Loss: 223.576050


Epoch 4/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.52it/s]

Batch 3000/4538 | Loss: 228.700958


Epoch 4/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.41it/s]

Batch 3200/4538 | Loss: 201.308060


Epoch 4/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.52it/s]

Batch 3400/4538 | Loss: 217.844727


Epoch 4/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.63it/s]

Batch 3600/4538 | Loss: 216.302963


Epoch 4/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.52it/s]

Batch 3800/4538 | Loss: 229.656219


Epoch 4/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.47it/s]

Batch 4000/4538 | Loss: 223.519531


Epoch 4/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.50it/s]

Batch 4200/4538 | Loss: 232.372498


Epoch 4/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.40it/s]

Batch 4400/4538 | Loss: 213.799255


Epoch 4/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.46it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.39it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.95it/s]


Epoch 4/100 | Average Train Loss: 218.778305 | Average Validation Loss: 216.292483
Saved new best model with validation loss: 216.292483


Epoch 5/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 225.779846


Epoch 5/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.42it/s]

Batch 200/4538 | Loss: 215.583893


Epoch 5/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.56it/s]

Batch 400/4538 | Loss: 212.415817


Epoch 5/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.47it/s]

Batch 600/4538 | Loss: 235.215759


Epoch 5/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.46it/s]

Batch 800/4538 | Loss: 218.282166


Epoch 5/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.43it/s]

Batch 1000/4538 | Loss: 198.674988


Epoch 5/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.50it/s]

Batch 1200/4538 | Loss: 224.364044


Epoch 5/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.41it/s]

Batch 1400/4538 | Loss: 200.263351


Epoch 5/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.43it/s]

Batch 1600/4538 | Loss: 205.186249


Epoch 5/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.46it/s]

Batch 1800/4538 | Loss: 217.547043


Epoch 5/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:12, 34.67it/s]

Batch 2000/4538 | Loss: 221.060623


Epoch 5/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.59it/s]

Batch 2200/4538 | Loss: 182.461868


Epoch 5/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.50it/s]

Batch 2400/4538 | Loss: 227.000702


Epoch 5/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.43it/s]

Batch 2600/4538 | Loss: 182.933197


Epoch 5/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.57it/s]

Batch 2800/4538 | Loss: 213.775528


Epoch 5/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.46it/s]

Batch 3000/4538 | Loss: 226.634689


Epoch 5/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.47it/s]

Batch 3200/4538 | Loss: 212.538452


Epoch 5/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.46it/s]

Batch 3400/4538 | Loss: 201.060623


Epoch 5/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.45it/s]

Batch 3600/4538 | Loss: 214.785294


Epoch 5/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.50it/s]

Batch 3800/4538 | Loss: 214.160233


Epoch 5/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.55it/s]

Batch 4000/4538 | Loss: 196.363297


Epoch 5/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.47it/s]

Batch 4200/4538 | Loss: 218.830978


Epoch 5/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.53it/s]

Batch 4400/4538 | Loss: 234.649582


Epoch 5/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.48it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 120.01it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.31it/s]


Epoch 5/100 | Average Train Loss: 214.006485 | Average Validation Loss: 211.518293
Saved new best model with validation loss: 211.518293


Epoch 6/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.57it/s]

Batch 0/4538 | Loss: 232.079071


Epoch 6/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.43it/s]

Batch 200/4538 | Loss: 173.998032


Epoch 6/100 [Train]:   9%|▉         | 407/4538 [00:11<02:00, 34.41it/s]

Batch 400/4538 | Loss: 190.546722


Epoch 6/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.50it/s]

Batch 600/4538 | Loss: 223.653000


Epoch 6/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.52it/s]

Batch 800/4538 | Loss: 197.200760


Epoch 6/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.63it/s]

Batch 1000/4538 | Loss: 216.625824


Epoch 6/100 [Train]:  27%|██▋       | 1203/4538 [00:34<01:38, 33.99it/s]

Batch 1200/4538 | Loss: 233.232086


Epoch 6/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.45it/s]

Batch 1400/4538 | Loss: 194.272797


Epoch 6/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.44it/s]

Batch 1600/4538 | Loss: 206.102783


Epoch 6/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.55it/s]

Batch 1800/4538 | Loss: 214.827759


Epoch 6/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.44it/s]

Batch 2000/4538 | Loss: 222.828339


Epoch 6/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.46it/s]

Batch 2200/4538 | Loss: 212.850876


Epoch 6/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.44it/s]

Batch 2400/4538 | Loss: 215.861694


Epoch 6/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.46it/s]

Batch 2600/4538 | Loss: 206.870361


Epoch 6/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.41it/s]

Batch 2800/4538 | Loss: 196.529526


Epoch 6/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.52it/s]

Batch 3000/4538 | Loss: 221.801437


Epoch 6/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.54it/s]

Batch 3200/4538 | Loss: 201.315643


Epoch 6/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.55it/s]

Batch 3400/4538 | Loss: 195.094223


Epoch 6/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.41it/s]

Batch 3600/4538 | Loss: 232.283875


Epoch 6/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.27it/s]

Batch 3800/4538 | Loss: 220.404037


Epoch 6/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.47it/s]

Batch 4000/4538 | Loss: 189.267563


Epoch 6/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.42it/s]

Batch 4200/4538 | Loss: 197.099426


Epoch 6/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.38it/s]

Batch 4400/4538 | Loss: 229.091080


Epoch 6/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.46it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.36it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.71it/s]


Epoch 6/100 | Average Train Loss: 209.231827 | Average Validation Loss: 206.716314
Saved new best model with validation loss: 206.716314


Epoch 7/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 231.335754


Epoch 7/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.77it/s]

Batch 200/4538 | Loss: 220.485275


Epoch 7/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.78it/s]

Batch 400/4538 | Loss: 204.269623


Epoch 7/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.75it/s]

Batch 600/4538 | Loss: 205.333252


Epoch 7/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.65it/s]

Batch 800/4538 | Loss: 205.786896


Epoch 7/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.70it/s]

Batch 1000/4538 | Loss: 213.587708


Epoch 7/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.71it/s]

Batch 1200/4538 | Loss: 206.012848


Epoch 7/100 [Train]:  31%|███       | 1407/4538 [00:40<01:29, 34.80it/s]

Batch 1400/4538 | Loss: 194.170105


Epoch 7/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:23, 34.92it/s]

Batch 1600/4538 | Loss: 171.169830


Epoch 7/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:20, 34.04it/s]

Batch 1800/4538 | Loss: 218.931259


Epoch 7/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.39it/s]

Batch 2000/4538 | Loss: 228.284119


Epoch 7/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.42it/s]

Batch 2200/4538 | Loss: 204.523209


Epoch 7/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:02, 34.34it/s]

Batch 2400/4538 | Loss: 219.776901


Epoch 7/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.43it/s]

Batch 2600/4538 | Loss: 208.606583


Epoch 7/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.41it/s]

Batch 2800/4538 | Loss: 212.535172


Epoch 7/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.41it/s]

Batch 3000/4538 | Loss: 186.444061


Epoch 7/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.51it/s]

Batch 3200/4538 | Loss: 198.511734


Epoch 7/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.46it/s]

Batch 3400/4538 | Loss: 211.898697


Epoch 7/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.45it/s]

Batch 3600/4538 | Loss: 211.928726


Epoch 7/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.47it/s]

Batch 3800/4538 | Loss: 211.940643


Epoch 7/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.43it/s]

Batch 4000/4538 | Loss: 185.993042


Epoch 7/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.44it/s]

Batch 4200/4538 | Loss: 178.517426


Epoch 7/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.00it/s]

Batch 4400/4538 | Loss: 220.505447


Epoch 7/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.54it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.34it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.91it/s]


Epoch 7/100 | Average Train Loss: 204.477036 | Average Validation Loss: 201.934992
Saved new best model with validation loss: 201.934992


Epoch 8/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.58it/s]

Batch 0/4538 | Loss: 219.314407


Epoch 8/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.41it/s]

Batch 200/4538 | Loss: 172.831345


Epoch 8/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.46it/s]

Batch 400/4538 | Loss: 191.850586


Epoch 8/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.45it/s]

Batch 600/4538 | Loss: 227.688568


Epoch 8/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.41it/s]

Batch 800/4538 | Loss: 208.702240


Epoch 8/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.60it/s]

Batch 1000/4538 | Loss: 207.937027


Epoch 8/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.39it/s]

Batch 1200/4538 | Loss: 187.752930


Epoch 8/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.42it/s]

Batch 1400/4538 | Loss: 172.204559


Epoch 8/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.53it/s]

Batch 1600/4538 | Loss: 186.305237


Epoch 8/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.56it/s]

Batch 1800/4538 | Loss: 148.847687


Epoch 8/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.44it/s]

Batch 2000/4538 | Loss: 203.061752


Epoch 8/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.49it/s]

Batch 2200/4538 | Loss: 196.002670


Epoch 8/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:03, 33.70it/s]

Batch 2400/4538 | Loss: 214.454788


Epoch 8/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.46it/s]

Batch 2600/4538 | Loss: 218.716034


Epoch 8/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.49it/s]

Batch 2800/4538 | Loss: 203.889420


Epoch 8/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.52it/s]

Batch 3000/4538 | Loss: 176.272095


Epoch 8/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 178.944244


Epoch 8/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.52it/s]

Batch 3400/4538 | Loss: 164.612656


Epoch 8/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.56it/s]

Batch 3600/4538 | Loss: 175.185440


Epoch 8/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.48it/s]

Batch 3800/4538 | Loss: 196.534683


Epoch 8/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.49it/s]

Batch 4000/4538 | Loss: 191.976624


Epoch 8/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.46it/s]

Batch 4200/4538 | Loss: 175.148026


Epoch 8/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.47it/s]

Batch 4400/4538 | Loss: 190.272385


Epoch 8/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.48it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.39it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 93.17it/s]


Epoch 8/100 | Average Train Loss: 199.823624 | Average Validation Loss: 197.295630
Saved new best model with validation loss: 197.295630


Epoch 9/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 203.709396


Epoch 9/100 [Train]:   5%|▍         | 207/4538 [00:05<02:05, 34.44it/s]

Batch 200/4538 | Loss: 201.411789


Epoch 9/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.46it/s]

Batch 400/4538 | Loss: 210.245911


Epoch 9/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.49it/s]

Batch 600/4538 | Loss: 199.970612


Epoch 9/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.47it/s]

Batch 800/4538 | Loss: 200.190872


Epoch 9/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.56it/s]

Batch 1000/4538 | Loss: 207.615753


Epoch 9/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.49it/s]

Batch 1200/4538 | Loss: 201.007858


Epoch 9/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.59it/s]

Batch 1400/4538 | Loss: 207.344589


Epoch 9/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.51it/s]

Batch 1600/4538 | Loss: 216.265762


Epoch 9/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.58it/s]

Batch 1800/4538 | Loss: 218.272522


Epoch 9/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.45it/s]

Batch 2000/4538 | Loss: 194.342102


Epoch 9/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.43it/s]

Batch 2200/4538 | Loss: 199.177582


Epoch 9/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.72it/s]

Batch 2400/4538 | Loss: 203.345169


Epoch 9/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.53it/s]

Batch 2600/4538 | Loss: 204.060165


Epoch 9/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.45it/s]

Batch 2800/4538 | Loss: 206.959686


Epoch 9/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.48it/s]

Batch 3000/4538 | Loss: 211.717361


Epoch 9/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.56it/s]

Batch 3200/4538 | Loss: 175.112778


Epoch 9/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.62it/s]

Batch 3400/4538 | Loss: 199.557083


Epoch 9/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.46it/s]

Batch 3600/4538 | Loss: 163.553040


Epoch 9/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.57it/s]

Batch 3800/4538 | Loss: 199.866241


Epoch 9/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.56it/s]

Batch 4000/4538 | Loss: 201.754471


Epoch 9/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.62it/s]

Batch 4200/4538 | Loss: 186.693481


Epoch 9/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.73it/s]

Batch 4400/4538 | Loss: 206.844589


Epoch 9/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.54it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.03it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.67it/s]


Epoch 9/100 | Average Train Loss: 195.317429 | Average Validation Loss: 192.833146
Saved new best model with validation loss: 192.833146


Epoch 10/100 [Train]:   0%|          | 3/4538 [00:00<03:03, 24.77it/s]

Batch 0/4538 | Loss: 183.855820


Epoch 10/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.65it/s]

Batch 200/4538 | Loss: 219.771011


Epoch 10/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.65it/s]

Batch 400/4538 | Loss: 219.235687


Epoch 10/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.63it/s]

Batch 600/4538 | Loss: 196.037598


Epoch 10/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.54it/s]

Batch 800/4538 | Loss: 167.286285


Epoch 10/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.51it/s]

Batch 1000/4538 | Loss: 198.694595


Epoch 10/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.57it/s]

Batch 1200/4538 | Loss: 162.651749


Epoch 10/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.62it/s]

Batch 1400/4538 | Loss: 190.880234


Epoch 10/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.58it/s]

Batch 1600/4538 | Loss: 203.201447


Epoch 10/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.49it/s]

Batch 1800/4538 | Loss: 206.462296


Epoch 10/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.54it/s]

Batch 2000/4538 | Loss: 194.066452


Epoch 10/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.53it/s]

Batch 2200/4538 | Loss: 189.175720


Epoch 10/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.72it/s]

Batch 2400/4538 | Loss: 206.995407


Epoch 10/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 33.95it/s]

Batch 2600/4538 | Loss: 151.897278


Epoch 10/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:49, 34.62it/s]

Batch 2800/4538 | Loss: 164.510544


Epoch 10/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.59it/s]

Batch 3000/4538 | Loss: 201.831421


Epoch 10/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.58it/s]

Batch 3200/4538 | Loss: 168.501877


Epoch 10/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.60it/s]

Batch 3400/4538 | Loss: 190.664444


Epoch 10/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.58it/s]

Batch 3600/4538 | Loss: 189.391281


Epoch 10/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.43it/s]

Batch 3800/4538 | Loss: 163.128983


Epoch 10/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.57it/s]

Batch 4000/4538 | Loss: 183.657349


Epoch 10/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.57it/s]

Batch 4200/4538 | Loss: 199.286636


Epoch 10/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.54it/s]

Batch 4400/4538 | Loss: 179.891373


Epoch 10/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.56it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.09it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.62it/s]


Epoch 10/100 | Average Train Loss: 190.931778 | Average Validation Loss: 188.485714
Saved new best model with validation loss: 188.485714


Epoch 11/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.54it/s]

Batch 0/4538 | Loss: 210.055801


Epoch 11/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.46it/s]

Batch 200/4538 | Loss: 196.839279


Epoch 11/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.53it/s]

Batch 400/4538 | Loss: 189.766281


Epoch 11/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:55, 34.06it/s]

Batch 600/4538 | Loss: 200.654816


Epoch 11/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.52it/s]

Batch 800/4538 | Loss: 184.233612


Epoch 11/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.52it/s]

Batch 1000/4538 | Loss: 168.036804


Epoch 11/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.58it/s]

Batch 1200/4538 | Loss: 156.106018


Epoch 11/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.60it/s]

Batch 1400/4538 | Loss: 207.787491


Epoch 11/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.50it/s]

Batch 1600/4538 | Loss: 200.005844


Epoch 11/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.48it/s]

Batch 1800/4538 | Loss: 173.166611


Epoch 11/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.54it/s]

Batch 2000/4538 | Loss: 172.414612


Epoch 11/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.41it/s]

Batch 2200/4538 | Loss: 200.062469


Epoch 11/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.46it/s]

Batch 2400/4538 | Loss: 194.590439


Epoch 11/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.49it/s]

Batch 2600/4538 | Loss: 187.001770


Epoch 11/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.45it/s]

Batch 2800/4538 | Loss: 199.398743


Epoch 11/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.48it/s]

Batch 3000/4538 | Loss: 193.310440


Epoch 11/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:39, 34.01it/s]

Batch 3200/4538 | Loss: 162.424408


Epoch 11/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.43it/s]

Batch 3400/4538 | Loss: 188.213577


Epoch 11/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.42it/s]

Batch 3600/4538 | Loss: 172.594742


Epoch 11/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.45it/s]

Batch 3800/4538 | Loss: 230.632263


Epoch 11/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.50it/s]

Batch 4000/4538 | Loss: 211.632599


Epoch 11/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.49it/s]

Batch 4200/4538 | Loss: 199.285461


Epoch 11/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.51it/s]

Batch 4400/4538 | Loss: 203.365326


Epoch 11/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.48it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.23it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.79it/s]


Epoch 11/100 | Average Train Loss: 186.654388 | Average Validation Loss: 184.240276
Saved new best model with validation loss: 184.240276


Epoch 12/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.53it/s]

Batch 0/4538 | Loss: 179.037445


Epoch 12/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.41it/s]

Batch 200/4538 | Loss: 184.626541


Epoch 12/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.58it/s]

Batch 400/4538 | Loss: 205.283081


Epoch 12/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.59it/s]

Batch 600/4538 | Loss: 165.025742


Epoch 12/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.63it/s]

Batch 800/4538 | Loss: 182.996536


Epoch 12/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.56it/s]

Batch 1000/4538 | Loss: 167.708679


Epoch 12/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.55it/s]

Batch 1200/4538 | Loss: 185.310715


Epoch 12/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.63it/s]

Batch 1400/4538 | Loss: 198.109589


Epoch 12/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.63it/s]

Batch 1600/4538 | Loss: 198.377716


Epoch 12/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.59it/s]

Batch 1800/4538 | Loss: 210.390366


Epoch 12/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.63it/s]

Batch 2000/4538 | Loss: 143.369293


Epoch 12/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.67it/s]

Batch 2200/4538 | Loss: 194.740082


Epoch 12/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.65it/s]

Batch 2400/4538 | Loss: 204.397797


Epoch 12/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.47it/s]

Batch 2600/4538 | Loss: 177.114746


Epoch 12/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.46it/s]

Batch 2800/4538 | Loss: 193.681503


Epoch 12/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.63it/s]

Batch 3000/4538 | Loss: 180.518051


Epoch 12/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.59it/s]

Batch 3200/4538 | Loss: 207.477539


Epoch 12/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.52it/s]

Batch 3400/4538 | Loss: 135.427475


Epoch 12/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.58it/s]

Batch 3600/4538 | Loss: 182.953186


Epoch 12/100 [Train]:  84%|████████▍ | 3803/4538 [01:49<00:22, 33.39it/s]

Batch 3800/4538 | Loss: 186.909439


Epoch 12/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.53it/s]

Batch 4000/4538 | Loss: 189.111206


Epoch 12/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.51it/s]

Batch 4200/4538 | Loss: 186.507309


Epoch 12/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.61it/s]

Batch 4400/4538 | Loss: 190.073166


Epoch 12/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.58it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 122.75it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.70it/s]


Epoch 12/100 | Average Train Loss: 182.487640 | Average Validation Loss: 180.129112
Saved new best model with validation loss: 180.129112


Epoch 13/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.39it/s]

Batch 0/4538 | Loss: 160.163193


Epoch 13/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.45it/s]

Batch 200/4538 | Loss: 190.816315


Epoch 13/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.50it/s]

Batch 400/4538 | Loss: 155.974869


Epoch 13/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.56it/s]

Batch 600/4538 | Loss: 174.973618


Epoch 13/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:50, 33.85it/s]

Batch 800/4538 | Loss: 174.172089


Epoch 13/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.50it/s]

Batch 1000/4538 | Loss: 140.732361


Epoch 13/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.46it/s]

Batch 1200/4538 | Loss: 186.213181


Epoch 13/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.51it/s]

Batch 1400/4538 | Loss: 156.548309


Epoch 13/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.48it/s]

Batch 1600/4538 | Loss: 187.414398


Epoch 13/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.55it/s]

Batch 1800/4538 | Loss: 153.516571


Epoch 13/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.55it/s]

Batch 2000/4538 | Loss: 198.934036


Epoch 13/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.64it/s]

Batch 2200/4538 | Loss: 150.927994


Epoch 13/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.49it/s]

Batch 2400/4538 | Loss: 174.088486


Epoch 13/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.58it/s]

Batch 2600/4538 | Loss: 157.380920


Epoch 13/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.53it/s]

Batch 2800/4538 | Loss: 183.738998


Epoch 13/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.53it/s]

Batch 3000/4538 | Loss: 199.475922


Epoch 13/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.44it/s]

Batch 3200/4538 | Loss: 185.748840


Epoch 13/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:33, 34.01it/s]

Batch 3400/4538 | Loss: 202.538071


Epoch 13/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.50it/s]

Batch 3600/4538 | Loss: 190.955322


Epoch 13/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.59it/s]

Batch 3800/4538 | Loss: 183.366348


Epoch 13/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.54it/s]

Batch 4000/4538 | Loss: 200.940399


Epoch 13/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.52it/s]

Batch 4200/4538 | Loss: 173.114212


Epoch 13/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.52it/s]

Batch 4400/4538 | Loss: 175.931000


Epoch 13/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.14it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.96it/s]


Epoch 13/100 | Average Train Loss: 178.455902 | Average Validation Loss: 176.175024
Saved new best model with validation loss: 176.175024


Epoch 14/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 179.536560


Epoch 14/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.57it/s]

Batch 200/4538 | Loss: 155.275543


Epoch 14/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.56it/s]

Batch 400/4538 | Loss: 214.789124


Epoch 14/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.53it/s]

Batch 600/4538 | Loss: 191.652817


Epoch 14/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.56it/s]

Batch 800/4538 | Loss: 175.740097


Epoch 14/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.47it/s]

Batch 1000/4538 | Loss: 186.935379


Epoch 14/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.55it/s]

Batch 1200/4538 | Loss: 189.585922


Epoch 14/100 [Train]:  31%|███       | 1407/4538 [00:40<01:31, 34.30it/s]

Batch 1400/4538 | Loss: 193.459656


Epoch 14/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.68it/s]

Batch 1600/4538 | Loss: 146.060974


Epoch 14/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.62it/s]

Batch 1800/4538 | Loss: 136.428970


Epoch 14/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:12, 34.68it/s]

Batch 2000/4538 | Loss: 188.488251


Epoch 14/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.55it/s]

Batch 2200/4538 | Loss: 205.752182


Epoch 14/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.61it/s]

Batch 2400/4538 | Loss: 153.015701


Epoch 14/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.55it/s]

Batch 2600/4538 | Loss: 177.228638


Epoch 14/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.56it/s]

Batch 2800/4538 | Loss: 172.724777


Epoch 14/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.42it/s]

Batch 3000/4538 | Loss: 175.326843


Epoch 14/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.58it/s]

Batch 3200/4538 | Loss: 128.873154


Epoch 14/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.56it/s]

Batch 3400/4538 | Loss: 173.612656


Epoch 14/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.48it/s]

Batch 3600/4538 | Loss: 200.776215


Epoch 14/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.60it/s]

Batch 3800/4538 | Loss: 198.919144


Epoch 14/100 [Train]:  88%|████████▊ | 4003/4538 [01:55<00:15, 33.54it/s]

Batch 4000/4538 | Loss: 177.448776


Epoch 14/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.56it/s]

Batch 4200/4538 | Loss: 176.956467


Epoch 14/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.57it/s]

Batch 4400/4538 | Loss: 187.819138


Epoch 14/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.57it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.31it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.25it/s]


Epoch 14/100 | Average Train Loss: 174.540604 | Average Validation Loss: 172.354458
Saved new best model with validation loss: 172.354458


Epoch 15/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 141.800034


Epoch 15/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.67it/s]

Batch 200/4538 | Loss: 181.274857


Epoch 15/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.69it/s]

Batch 400/4538 | Loss: 176.554764


Epoch 15/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.53it/s]

Batch 600/4538 | Loss: 183.934647


Epoch 15/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.51it/s]

Batch 800/4538 | Loss: 145.923767


Epoch 15/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.33it/s]

Batch 1000/4538 | Loss: 222.313583


Epoch 15/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.56it/s]

Batch 1200/4538 | Loss: 176.382889


Epoch 15/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.57it/s]

Batch 1400/4538 | Loss: 132.971680


Epoch 15/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.51it/s]

Batch 1600/4538 | Loss: 175.528656


Epoch 15/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.53it/s]

Batch 1800/4538 | Loss: 198.495377


Epoch 15/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.48it/s]

Batch 2000/4538 | Loss: 160.347855


Epoch 15/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.54it/s]

Batch 2200/4538 | Loss: 170.176071


Epoch 15/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.56it/s]

Batch 2400/4538 | Loss: 179.692886


Epoch 15/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.52it/s]

Batch 2600/4538 | Loss: 161.362961


Epoch 15/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.49it/s]

Batch 2800/4538 | Loss: 156.656815


Epoch 15/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.48it/s]

Batch 3000/4538 | Loss: 177.011749


Epoch 15/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.56it/s]

Batch 3200/4538 | Loss: 156.492264


Epoch 15/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.46it/s]

Batch 3400/4538 | Loss: 175.607208


Epoch 15/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.36it/s]

Batch 3600/4538 | Loss: 143.736481


Epoch 15/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.53it/s]

Batch 3800/4538 | Loss: 159.307693


Epoch 15/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.54it/s]

Batch 4000/4538 | Loss: 198.336624


Epoch 15/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.54it/s]

Batch 4200/4538 | Loss: 153.985611


Epoch 15/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.61it/s]

Batch 4400/4538 | Loss: 175.494247


Epoch 15/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.54it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.68it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.98it/s]


Epoch 15/100 | Average Train Loss: 170.757324 | Average Validation Loss: 168.673945
Saved new best model with validation loss: 168.673945


Epoch 16/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.61it/s]

Batch 0/4538 | Loss: 201.637421


Epoch 16/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.52it/s]

Batch 200/4538 | Loss: 179.118057


Epoch 16/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.62it/s]

Batch 400/4538 | Loss: 197.979340


Epoch 16/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.62it/s]

Batch 600/4538 | Loss: 183.672104


Epoch 16/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.47it/s]

Batch 800/4538 | Loss: 194.651886


Epoch 16/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.57it/s]

Batch 1000/4538 | Loss: 167.877350


Epoch 16/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.55it/s]

Batch 1200/4538 | Loss: 175.401367


Epoch 16/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.62it/s]

Batch 1400/4538 | Loss: 189.529282


Epoch 16/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.27it/s]

Batch 1600/4538 | Loss: 178.617889


Epoch 16/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.70it/s]

Batch 1800/4538 | Loss: 156.162354


Epoch 16/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:12, 34.79it/s]

Batch 2000/4538 | Loss: 122.284019


Epoch 16/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.70it/s]

Batch 2200/4538 | Loss: 170.996307


Epoch 16/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.69it/s]

Batch 2400/4538 | Loss: 133.664337


Epoch 16/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.68it/s]

Batch 2600/4538 | Loss: 175.680832


Epoch 16/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.57it/s]

Batch 2800/4538 | Loss: 182.817001


Epoch 16/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.73it/s]

Batch 3000/4538 | Loss: 177.202652


Epoch 16/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.75it/s]

Batch 3200/4538 | Loss: 196.994431


Epoch 16/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.77it/s]

Batch 3400/4538 | Loss: 183.910385


Epoch 16/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.71it/s]

Batch 3600/4538 | Loss: 167.515381


Epoch 16/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.71it/s]

Batch 3800/4538 | Loss: 151.681595


Epoch 16/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.64it/s]

Batch 4000/4538 | Loss: 181.213684


Epoch 16/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.25it/s]

Batch 4200/4538 | Loss: 150.243210


Epoch 16/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.69it/s]

Batch 4400/4538 | Loss: 185.836548


Epoch 16/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.62it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.31it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 95.58it/s]


Epoch 16/100 | Average Train Loss: 167.105377 | Average Validation Loss: 165.095195
Saved new best model with validation loss: 165.095195


Epoch 17/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.32it/s]

Batch 0/4538 | Loss: 178.951904


Epoch 17/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.56it/s]

Batch 200/4538 | Loss: 120.787956


Epoch 17/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.59it/s]

Batch 400/4538 | Loss: 181.550385


Epoch 17/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.66it/s]

Batch 600/4538 | Loss: 185.743332


Epoch 17/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.60it/s]

Batch 800/4538 | Loss: 173.439728


Epoch 17/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.64it/s]

Batch 1000/4538 | Loss: 203.464798


Epoch 17/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.37it/s]

Batch 1200/4538 | Loss: 134.256393


Epoch 17/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.59it/s]

Batch 1400/4538 | Loss: 159.391647


Epoch 17/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.73it/s]

Batch 1600/4538 | Loss: 174.870071


Epoch 17/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.69it/s]

Batch 1800/4538 | Loss: 145.291519


Epoch 17/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.67it/s]

Batch 2000/4538 | Loss: 176.276123


Epoch 17/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.66it/s]

Batch 2200/4538 | Loss: 138.898773


Epoch 17/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.72it/s]

Batch 2400/4538 | Loss: 180.909714


Epoch 17/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.63it/s]

Batch 2600/4538 | Loss: 169.695923


Epoch 17/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:49, 34.78it/s]

Batch 2800/4538 | Loss: 125.622383


Epoch 17/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:43, 34.80it/s]

Batch 3000/4538 | Loss: 179.515045


Epoch 17/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.88it/s]

Batch 3200/4538 | Loss: 179.493759


Epoch 17/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.80it/s]

Batch 3400/4538 | Loss: 168.863693


Epoch 17/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.75it/s]

Batch 3600/4538 | Loss: 183.784821


Epoch 17/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.41it/s]

Batch 3800/4538 | Loss: 124.908630


Epoch 17/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.76it/s]

Batch 4000/4538 | Loss: 162.635834


Epoch 17/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.81it/s]

Batch 4200/4538 | Loss: 197.005630


Epoch 17/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.70it/s]

Batch 4400/4538 | Loss: 181.602432


Epoch 17/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.69it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.05it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 94.14it/s]


Epoch 17/100 | Average Train Loss: 163.561142 | Average Validation Loss: 161.595257
Saved new best model with validation loss: 161.595257


Epoch 18/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 171.692719


Epoch 18/100 [Train]:   5%|▍         | 207/4538 [00:06<02:04, 34.77it/s]

Batch 200/4538 | Loss: 170.338745


Epoch 18/100 [Train]:   9%|▉         | 407/4538 [00:11<02:00, 34.27it/s]

Batch 400/4538 | Loss: 174.006180


Epoch 18/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.62it/s]

Batch 600/4538 | Loss: 172.026291


Epoch 18/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.26it/s]

Batch 800/4538 | Loss: 161.664566


Epoch 18/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.73it/s]

Batch 1000/4538 | Loss: 164.042633


Epoch 18/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.75it/s]

Batch 1200/4538 | Loss: 183.163513


Epoch 18/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.62it/s]

Batch 1400/4538 | Loss: 176.615158


Epoch 18/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.63it/s]

Batch 1600/4538 | Loss: 172.679672


Epoch 18/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.64it/s]

Batch 1800/4538 | Loss: 178.725876


Epoch 18/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.66it/s]

Batch 2000/4538 | Loss: 180.869827


Epoch 18/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.67it/s]

Batch 2200/4538 | Loss: 153.915115


Epoch 18/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.72it/s]

Batch 2400/4538 | Loss: 172.973953


Epoch 18/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.67it/s]

Batch 2600/4538 | Loss: 136.000366


Epoch 18/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.65it/s]

Batch 2800/4538 | Loss: 182.934021


Epoch 18/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.60it/s]

Batch 3000/4538 | Loss: 187.464432


Epoch 18/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.65it/s]

Batch 3200/4538 | Loss: 173.222504


Epoch 18/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:33, 33.76it/s]

Batch 3400/4538 | Loss: 178.386658


Epoch 18/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.76it/s]

Batch 3600/4538 | Loss: 172.956360


Epoch 18/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.68it/s]

Batch 3800/4538 | Loss: 142.683090


Epoch 18/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.67it/s]

Batch 4000/4538 | Loss: 99.983299


Epoch 18/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.60it/s]

Batch 4200/4538 | Loss: 122.734566


Epoch 18/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.66it/s]

Batch 4400/4538 | Loss: 108.407829


Epoch 18/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.66it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 122.97it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.47it/s]


Epoch 18/100 | Average Train Loss: 160.138336 | Average Validation Loss: 158.224493
Saved new best model with validation loss: 158.224493


Epoch 19/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 167.914078


Epoch 19/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.50it/s]

Batch 200/4538 | Loss: 182.269958


Epoch 19/100 [Train]:   9%|▉         | 407/4538 [00:11<02:01, 34.07it/s]

Batch 400/4538 | Loss: 162.822250


Epoch 19/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.74it/s]

Batch 600/4538 | Loss: 165.173203


Epoch 19/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.47it/s]

Batch 800/4538 | Loss: 181.886215


Epoch 19/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.55it/s]

Batch 1000/4538 | Loss: 165.639648


Epoch 19/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.57it/s]

Batch 1200/4538 | Loss: 170.908310


Epoch 19/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.44it/s]

Batch 1400/4538 | Loss: 182.560745


Epoch 19/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.63it/s]

Batch 1600/4538 | Loss: 204.186432


Epoch 19/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.63it/s]

Batch 1800/4538 | Loss: 188.413025


Epoch 19/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:12, 34.70it/s]

Batch 2000/4538 | Loss: 140.651093


Epoch 19/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.70it/s]

Batch 2200/4538 | Loss: 169.374146


Epoch 19/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.66it/s]

Batch 2400/4538 | Loss: 168.642944


Epoch 19/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.69it/s]

Batch 2600/4538 | Loss: 178.816849


Epoch 19/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.56it/s]

Batch 2800/4538 | Loss: 101.695992


Epoch 19/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.09it/s]

Batch 3000/4538 | Loss: 168.992676


Epoch 19/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 167.348816


Epoch 19/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.52it/s]

Batch 3400/4538 | Loss: 158.019333


Epoch 19/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.62it/s]

Batch 3600/4538 | Loss: 113.921303


Epoch 19/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.62it/s]

Batch 3800/4538 | Loss: 162.901779


Epoch 19/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.50it/s]

Batch 4000/4538 | Loss: 158.361557


Epoch 19/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.56it/s]

Batch 4200/4538 | Loss: 170.756454


Epoch 19/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.60it/s]

Batch 4400/4538 | Loss: 159.452209


Epoch 19/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.57it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 120.87it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.33it/s]


Epoch 19/100 | Average Train Loss: 156.788376 | Average Validation Loss: 154.894770
Saved new best model with validation loss: 154.894770


Epoch 20/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.33it/s]

Batch 0/4538 | Loss: 145.002243


Epoch 20/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.43it/s]

Batch 200/4538 | Loss: 173.154739


Epoch 20/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.52it/s]

Batch 400/4538 | Loss: 163.874680


Epoch 20/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.48it/s]

Batch 600/4538 | Loss: 172.338501


Epoch 20/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.51it/s]

Batch 800/4538 | Loss: 157.900925


Epoch 20/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:43, 34.00it/s]

Batch 1000/4538 | Loss: 152.111435


Epoch 20/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.44it/s]

Batch 1200/4538 | Loss: 156.860229


Epoch 20/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.66it/s]

Batch 1400/4538 | Loss: 163.667831


Epoch 20/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.66it/s]

Batch 1600/4538 | Loss: 111.115417


Epoch 20/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.54it/s]

Batch 1800/4538 | Loss: 186.299423


Epoch 20/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.48it/s]

Batch 2000/4538 | Loss: 164.980377


Epoch 20/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.56it/s]

Batch 2200/4538 | Loss: 148.574707


Epoch 20/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.59it/s]

Batch 2400/4538 | Loss: 162.705368


Epoch 20/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.51it/s]

Batch 2600/4538 | Loss: 168.290192


Epoch 20/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.45it/s]

Batch 2800/4538 | Loss: 155.944443


Epoch 20/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.42it/s]

Batch 3000/4538 | Loss: 179.600082


Epoch 20/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.46it/s]

Batch 3200/4538 | Loss: 181.545456


Epoch 20/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.40it/s]

Batch 3400/4538 | Loss: 178.995438


Epoch 20/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.06it/s]

Batch 3600/4538 | Loss: 171.421539


Epoch 20/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.47it/s]

Batch 3800/4538 | Loss: 175.282654


Epoch 20/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.56it/s]

Batch 4000/4538 | Loss: 168.227661


Epoch 20/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.56it/s]

Batch 4200/4538 | Loss: 167.428604


Epoch 20/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.50it/s]

Batch 4400/4538 | Loss: 145.101929


Epoch 20/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 120.43it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.29it/s]


Epoch 20/100 | Average Train Loss: 153.483215 | Average Validation Loss: 151.556732
Saved new best model with validation loss: 151.556732


Epoch 21/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.58it/s]

Batch 0/4538 | Loss: 160.896133


Epoch 21/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.42it/s]

Batch 200/4538 | Loss: 164.980042


Epoch 21/100 [Train]:   9%|▉         | 407/4538 [00:11<02:00, 34.40it/s]

Batch 400/4538 | Loss: 150.891495


Epoch 21/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.41it/s]

Batch 600/4538 | Loss: 100.938110


Epoch 21/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.44it/s]

Batch 800/4538 | Loss: 143.202438


Epoch 21/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.44it/s]

Batch 1000/4538 | Loss: 142.671631


Epoch 21/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.61it/s]

Batch 1200/4538 | Loss: 129.984894


Epoch 21/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.42it/s]

Batch 1400/4538 | Loss: 169.107819


Epoch 21/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.17it/s]

Batch 1600/4538 | Loss: 124.761833


Epoch 21/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.40it/s]

Batch 1800/4538 | Loss: 165.266357


Epoch 21/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.50it/s]

Batch 2000/4538 | Loss: 107.976402


Epoch 21/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.48it/s]

Batch 2200/4538 | Loss: 142.125427


Epoch 21/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.44it/s]

Batch 2400/4538 | Loss: 165.538055


Epoch 21/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.11it/s]

Batch 2600/4538 | Loss: 116.324173


Epoch 21/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.51it/s]

Batch 2800/4538 | Loss: 96.156906


Epoch 21/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.36it/s]

Batch 3000/4538 | Loss: 174.837891


Epoch 21/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.51it/s]

Batch 3200/4538 | Loss: 151.203735


Epoch 21/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.50it/s]

Batch 3400/4538 | Loss: 157.675781


Epoch 21/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.61it/s]

Batch 3600/4538 | Loss: 160.136932


Epoch 21/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.38it/s]

Batch 3800/4538 | Loss: 179.230881


Epoch 21/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.43it/s]

Batch 4000/4538 | Loss: 144.157318


Epoch 21/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.45it/s]

Batch 4200/4538 | Loss: 173.285095


Epoch 21/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.48it/s]

Batch 4400/4538 | Loss: 152.634689


Epoch 21/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.48it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 120.97it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.26it/s]


Epoch 21/100 | Average Train Loss: 150.171097 | Average Validation Loss: 148.241249
Saved new best model with validation loss: 148.241249


Epoch 22/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.46it/s]

Batch 0/4538 | Loss: 201.101410


Epoch 22/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.41it/s]

Batch 200/4538 | Loss: 133.947540


Epoch 22/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.59it/s]

Batch 400/4538 | Loss: 157.234940


Epoch 22/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.47it/s]

Batch 600/4538 | Loss: 137.447952


Epoch 22/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.49it/s]

Batch 800/4538 | Loss: 129.671173


Epoch 22/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.42it/s]

Batch 1000/4538 | Loss: 114.042671


Epoch 22/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.51it/s]

Batch 1200/4538 | Loss: 138.750397


Epoch 22/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.52it/s]

Batch 1400/4538 | Loss: 157.271011


Epoch 22/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.54it/s]

Batch 1600/4538 | Loss: 149.422134


Epoch 22/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.44it/s]

Batch 1800/4538 | Loss: 149.914856


Epoch 22/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.43it/s]

Batch 2000/4538 | Loss: 160.628998


Epoch 22/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.31it/s]

Batch 2200/4538 | Loss: 100.631798


Epoch 22/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.40it/s]

Batch 2400/4538 | Loss: 160.086746


Epoch 22/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.45it/s]

Batch 2600/4538 | Loss: 152.324020


Epoch 22/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.41it/s]

Batch 2800/4538 | Loss: 156.815582


Epoch 22/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.49it/s]

Batch 3000/4538 | Loss: 137.166229


Epoch 22/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.45it/s]

Batch 3200/4538 | Loss: 160.817108


Epoch 22/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.48it/s]

Batch 3400/4538 | Loss: 159.811584


Epoch 22/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.44it/s]

Batch 3600/4538 | Loss: 138.283417


Epoch 22/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.48it/s]

Batch 3800/4538 | Loss: 154.730606


Epoch 22/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.45it/s]

Batch 4000/4538 | Loss: 101.281570


Epoch 22/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.41it/s]

Batch 4200/4538 | Loss: 163.694229


Epoch 22/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.44it/s]

Batch 4400/4538 | Loss: 181.059494


Epoch 22/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.47it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.01it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.81it/s]


Epoch 22/100 | Average Train Loss: 146.865540 | Average Validation Loss: 144.830414
Saved new best model with validation loss: 144.830414


Epoch 23/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 152.275070


Epoch 23/100 [Train]:   5%|▍         | 207/4538 [00:06<02:06, 34.34it/s]

Batch 200/4538 | Loss: 121.532669


Epoch 23/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.55it/s]

Batch 400/4538 | Loss: 157.845795


Epoch 23/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.51it/s]

Batch 600/4538 | Loss: 106.155952


Epoch 23/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.55it/s]

Batch 800/4538 | Loss: 155.154602


Epoch 23/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.47it/s]

Batch 1000/4538 | Loss: 104.000046


Epoch 23/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.51it/s]

Batch 1200/4538 | Loss: 155.394165


Epoch 23/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.55it/s]

Batch 1400/4538 | Loss: 118.791229


Epoch 23/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.58it/s]

Batch 1600/4538 | Loss: 141.209732


Epoch 23/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.55it/s]

Batch 1800/4538 | Loss: 149.011993


Epoch 23/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.58it/s]

Batch 2000/4538 | Loss: 154.335831


Epoch 23/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.45it/s]

Batch 2200/4538 | Loss: 109.096443


Epoch 23/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.60it/s]

Batch 2400/4538 | Loss: 127.424095


Epoch 23/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.50it/s]

Batch 2600/4538 | Loss: 193.906769


Epoch 23/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.51it/s]

Batch 2800/4538 | Loss: 156.654190


Epoch 23/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.44it/s]

Batch 3000/4538 | Loss: 160.143616


Epoch 23/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.67it/s]

Batch 3200/4538 | Loss: 137.375656


Epoch 23/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.58it/s]

Batch 3400/4538 | Loss: 169.744843


Epoch 23/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.54it/s]

Batch 3600/4538 | Loss: 123.869003


Epoch 23/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.65it/s]

Batch 3800/4538 | Loss: 157.402298


Epoch 23/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.49it/s]

Batch 4000/4538 | Loss: 121.564262


Epoch 23/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.49it/s]

Batch 4200/4538 | Loss: 129.182053


Epoch 23/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.58it/s]

Batch 4400/4538 | Loss: 159.375351


Epoch 23/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.53it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.35it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.25it/s]


Epoch 23/100 | Average Train Loss: 143.586761 | Average Validation Loss: 141.486146
Saved new best model with validation loss: 141.486146


Epoch 24/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.46it/s]

Batch 0/4538 | Loss: 149.781570


Epoch 24/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.45it/s]

Batch 200/4538 | Loss: 155.321503


Epoch 24/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.54it/s]

Batch 400/4538 | Loss: 139.473526


Epoch 24/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.53it/s]

Batch 600/4538 | Loss: 161.227829


Epoch 24/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:49, 34.18it/s]

Batch 800/4538 | Loss: 101.752304


Epoch 24/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.46it/s]

Batch 1000/4538 | Loss: 151.456573


Epoch 24/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.54it/s]

Batch 1200/4538 | Loss: 102.516251


Epoch 24/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.51it/s]

Batch 1400/4538 | Loss: 126.247421


Epoch 24/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.53it/s]

Batch 1600/4538 | Loss: 151.261993


Epoch 24/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.52it/s]

Batch 1800/4538 | Loss: 121.372467


Epoch 24/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.54it/s]

Batch 2000/4538 | Loss: 154.690384


Epoch 24/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.53it/s]

Batch 2200/4538 | Loss: 129.368210


Epoch 24/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.53it/s]

Batch 2400/4538 | Loss: 97.405212


Epoch 24/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.54it/s]

Batch 2600/4538 | Loss: 92.766197


Epoch 24/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.49it/s]

Batch 2800/4538 | Loss: 102.334564


Epoch 24/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.59it/s]

Batch 3000/4538 | Loss: 176.294189


Epoch 24/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.54it/s]

Batch 3200/4538 | Loss: 115.666206


Epoch 24/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.40it/s]

Batch 3400/4538 | Loss: 149.566711


Epoch 24/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.50it/s]

Batch 3600/4538 | Loss: 115.192650


Epoch 24/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.52it/s]

Batch 3800/4538 | Loss: 126.127861


Epoch 24/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.46it/s]

Batch 4000/4538 | Loss: 91.306664


Epoch 24/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.53it/s]

Batch 4200/4538 | Loss: 136.514328


Epoch 24/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.58it/s]

Batch 4400/4538 | Loss: 135.891220


Epoch 24/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.51it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.97it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.71it/s]


Epoch 24/100 | Average Train Loss: 140.319733 | Average Validation Loss: 138.231320
Saved new best model with validation loss: 138.231320


Epoch 25/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.50it/s]

Batch 0/4538 | Loss: 118.249443


Epoch 25/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.49it/s]

Batch 200/4538 | Loss: 157.222549


Epoch 25/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.47it/s]

Batch 400/4538 | Loss: 143.805984


Epoch 25/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.52it/s]

Batch 600/4538 | Loss: 132.077271


Epoch 25/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.52it/s]

Batch 800/4538 | Loss: 170.836639


Epoch 25/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.49it/s]

Batch 1000/4538 | Loss: 130.035828


Epoch 25/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.53it/s]

Batch 1200/4538 | Loss: 158.332230


Epoch 25/100 [Train]:  31%|███       | 1407/4538 [00:40<01:31, 34.05it/s]

Batch 1400/4538 | Loss: 168.840408


Epoch 25/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.51it/s]

Batch 1600/4538 | Loss: 99.609421


Epoch 25/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.54it/s]

Batch 1800/4538 | Loss: 159.683182


Epoch 25/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.45it/s]

Batch 2000/4538 | Loss: 121.204308


Epoch 25/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.50it/s]

Batch 2200/4538 | Loss: 155.294968


Epoch 25/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.46it/s]

Batch 2400/4538 | Loss: 139.232468


Epoch 25/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.51it/s]

Batch 2600/4538 | Loss: 127.972168


Epoch 25/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.48it/s]

Batch 2800/4538 | Loss: 122.289116


Epoch 25/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.48it/s]

Batch 3000/4538 | Loss: 150.612411


Epoch 25/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 146.649475


Epoch 25/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.53it/s]

Batch 3400/4538 | Loss: 151.881485


Epoch 25/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.42it/s]

Batch 3600/4538 | Loss: 128.998306


Epoch 25/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.41it/s]

Batch 3800/4538 | Loss: 118.037704


Epoch 25/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.32it/s]

Batch 4000/4538 | Loss: 130.593628


Epoch 25/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.46it/s]

Batch 4200/4538 | Loss: 100.164848


Epoch 25/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.49it/s]

Batch 4400/4538 | Loss: 164.769623


Epoch 25/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.49it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.74it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.63it/s]


Epoch 25/100 | Average Train Loss: 137.119598 | Average Validation Loss: 135.037853
Saved new best model with validation loss: 135.037853


Epoch 26/100 [Train]:   0%|          | 5/4538 [00:00<03:33, 21.28it/s]

Batch 0/4538 | Loss: 147.561081


Epoch 26/100 [Train]:   5%|▍         | 205/4538 [00:06<02:05, 34.57it/s]

Batch 200/4538 | Loss: 152.811600


Epoch 26/100 [Train]:   9%|▉         | 405/4538 [00:11<01:59, 34.51it/s]

Batch 400/4538 | Loss: 105.379532


Epoch 26/100 [Train]:  13%|█▎        | 605/4538 [00:17<01:54, 34.46it/s]

Batch 600/4538 | Loss: 106.312935


Epoch 26/100 [Train]:  18%|█▊        | 805/4538 [00:23<01:48, 34.49it/s]

Batch 800/4538 | Loss: 138.125900


Epoch 26/100 [Train]:  22%|██▏       | 1005/4538 [00:29<01:42, 34.51it/s]

Batch 1000/4538 | Loss: 149.370453


Epoch 26/100 [Train]:  27%|██▋       | 1205/4538 [00:35<01:36, 34.62it/s]

Batch 1200/4538 | Loss: 90.497276


Epoch 26/100 [Train]:  31%|███       | 1405/4538 [00:40<01:30, 34.64it/s]

Batch 1400/4538 | Loss: 111.763702


Epoch 26/100 [Train]:  35%|███▌      | 1605/4538 [00:46<01:24, 34.63it/s]

Batch 1600/4538 | Loss: 155.797699


Epoch 26/100 [Train]:  40%|███▉      | 1805/4538 [00:52<01:18, 34.68it/s]

Batch 1800/4538 | Loss: 112.156189


Epoch 26/100 [Train]:  44%|████▍     | 2005/4538 [00:58<01:15, 33.77it/s]

Batch 2000/4538 | Loss: 89.520714


Epoch 26/100 [Train]:  49%|████▊     | 2205/4538 [01:03<01:07, 34.54it/s]

Batch 2200/4538 | Loss: 155.514893


Epoch 26/100 [Train]:  53%|█████▎    | 2405/4538 [01:09<01:01, 34.63it/s]

Batch 2400/4538 | Loss: 118.217880


Epoch 26/100 [Train]:  57%|█████▋    | 2605/4538 [01:15<00:55, 34.62it/s]

Batch 2600/4538 | Loss: 119.388199


Epoch 26/100 [Train]:  62%|██████▏   | 2805/4538 [01:21<00:49, 34.68it/s]

Batch 2800/4538 | Loss: 150.894806


Epoch 26/100 [Train]:  66%|██████▌   | 3005/4538 [01:26<00:44, 34.59it/s]

Batch 3000/4538 | Loss: 78.737930


Epoch 26/100 [Train]:  71%|███████   | 3205/4538 [01:32<00:38, 34.54it/s]

Batch 3200/4538 | Loss: 176.933182


Epoch 26/100 [Train]:  75%|███████▌  | 3405/4538 [01:38<00:32, 34.53it/s]

Batch 3400/4538 | Loss: 107.966537


Epoch 26/100 [Train]:  79%|███████▉  | 3605/4538 [01:44<00:27, 34.53it/s]

Batch 3600/4538 | Loss: 133.339432


Epoch 26/100 [Train]:  84%|████████▍ | 3805/4538 [01:50<00:21, 34.48it/s]

Batch 3800/4538 | Loss: 93.019615


Epoch 26/100 [Train]:  88%|████████▊ | 4005/4538 [01:55<00:15, 34.51it/s]

Batch 4000/4538 | Loss: 151.818741


Epoch 26/100 [Train]:  93%|█████████▎| 4205/4538 [02:01<00:09, 34.65it/s]

Batch 4200/4538 | Loss: 144.152878


Epoch 26/100 [Train]:  97%|█████████▋| 4405/4538 [02:07<00:03, 34.48it/s]

Batch 4400/4538 | Loss: 132.145554


Epoch 26/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.53it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 124.42it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 93.53it/s]


Epoch 26/100 | Average Train Loss: 133.999817 | Average Validation Loss: 131.825469
Saved new best model with validation loss: 131.825469


Epoch 27/100 [Train]:   0%|          | 7/4538 [00:00<02:21, 32.06it/s]

Batch 0/4538 | Loss: 138.201263


Epoch 27/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.82it/s]

Batch 200/4538 | Loss: 150.411346


Epoch 27/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.85it/s]

Batch 400/4538 | Loss: 140.812775


Epoch 27/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.69it/s]

Batch 600/4538 | Loss: 118.388252


Epoch 27/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:46, 34.92it/s]

Batch 800/4538 | Loss: 140.469604


Epoch 27/100 [Train]:  22%|██▏       | 1007/4538 [00:28<01:41, 34.93it/s]

Batch 1000/4538 | Loss: 88.618774


Epoch 27/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.49it/s]

Batch 1200/4538 | Loss: 171.991882


Epoch 27/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.52it/s]

Batch 1400/4538 | Loss: 61.729023


Epoch 27/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.60it/s]

Batch 1600/4538 | Loss: 124.310417


Epoch 27/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.46it/s]

Batch 1800/4538 | Loss: 146.396820


Epoch 27/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.52it/s]

Batch 2000/4538 | Loss: 143.613297


Epoch 27/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.56it/s]

Batch 2200/4538 | Loss: 110.875435


Epoch 27/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.47it/s]

Batch 2400/4538 | Loss: 134.902405


Epoch 27/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.51it/s]

Batch 2600/4538 | Loss: 152.409302


Epoch 27/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.51it/s]

Batch 2800/4538 | Loss: 149.292191


Epoch 27/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.49it/s]

Batch 3000/4538 | Loss: 155.267426


Epoch 27/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 104.121063


Epoch 27/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.59it/s]

Batch 3400/4538 | Loss: 120.864746


Epoch 27/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.56it/s]

Batch 3600/4538 | Loss: 156.520706


Epoch 27/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.61it/s]

Batch 3800/4538 | Loss: 126.617058


Epoch 27/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.55it/s]

Batch 4000/4538 | Loss: 124.946976


Epoch 27/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 33.70it/s]

Batch 4200/4538 | Loss: 157.274963


Epoch 27/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.54it/s]

Batch 4400/4538 | Loss: 95.185234


Epoch 27/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.59it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 122.23it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.50it/s]


Epoch 27/100 | Average Train Loss: 130.951614 | Average Validation Loss: 128.824762
Saved new best model with validation loss: 128.824762


Epoch 28/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 140.552277


Epoch 28/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.43it/s]

Batch 200/4538 | Loss: 113.536606


Epoch 28/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.48it/s]

Batch 400/4538 | Loss: 133.660156


Epoch 28/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.46it/s]

Batch 600/4538 | Loss: 148.191116


Epoch 28/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.57it/s]

Batch 800/4538 | Loss: 91.885284


Epoch 28/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.57it/s]

Batch 1000/4538 | Loss: 144.617325


Epoch 28/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:37, 34.19it/s]

Batch 1200/4538 | Loss: 125.850952


Epoch 28/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.45it/s]

Batch 1400/4538 | Loss: 143.196381


Epoch 28/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.46it/s]

Batch 1600/4538 | Loss: 149.857834


Epoch 28/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.48it/s]

Batch 1800/4538 | Loss: 147.569077


Epoch 28/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.55it/s]

Batch 2000/4538 | Loss: 163.021530


Epoch 28/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:09, 33.75it/s]

Batch 2200/4538 | Loss: 79.649269


Epoch 28/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.53it/s]

Batch 2400/4538 | Loss: 151.998260


Epoch 28/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.55it/s]

Batch 2600/4538 | Loss: 115.710442


Epoch 28/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.61it/s]

Batch 2800/4538 | Loss: 134.320892


Epoch 28/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.61it/s]

Batch 3000/4538 | Loss: 149.260422


Epoch 28/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.57it/s]

Batch 3200/4538 | Loss: 133.631454


Epoch 28/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.60it/s]

Batch 3400/4538 | Loss: 120.602142


Epoch 28/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.62it/s]

Batch 3600/4538 | Loss: 156.537750


Epoch 28/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.47it/s]

Batch 3800/4538 | Loss: 121.978645


Epoch 28/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.53it/s]

Batch 4000/4538 | Loss: 107.834976


Epoch 28/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.66it/s]

Batch 4200/4538 | Loss: 135.791748


Epoch 28/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.50it/s]

Batch 4400/4538 | Loss: 171.088730


Epoch 28/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.51it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.66it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.84it/s]


Epoch 28/100 | Average Train Loss: 127.980049 | Average Validation Loss: 125.831042
Saved new best model with validation loss: 125.831042


Epoch 29/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.38it/s]

Batch 0/4538 | Loss: 136.192719


Epoch 29/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.63it/s]

Batch 200/4538 | Loss: 107.020668


Epoch 29/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.64it/s]

Batch 400/4538 | Loss: 145.201523


Epoch 29/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.58it/s]

Batch 600/4538 | Loss: 109.373840


Epoch 29/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.53it/s]

Batch 800/4538 | Loss: 102.635948


Epoch 29/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.52it/s]

Batch 1000/4538 | Loss: 126.380882


Epoch 29/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.42it/s]

Batch 1200/4538 | Loss: 96.574226


Epoch 29/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.49it/s]

Batch 1400/4538 | Loss: 131.875458


Epoch 29/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.45it/s]

Batch 1600/4538 | Loss: 102.288139


Epoch 29/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.14it/s]

Batch 1800/4538 | Loss: 148.075974


Epoch 29/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.44it/s]

Batch 2000/4538 | Loss: 177.101547


Epoch 29/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.43it/s]

Batch 2200/4538 | Loss: 130.990021


Epoch 29/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:02, 34.36it/s]

Batch 2400/4538 | Loss: 110.721977


Epoch 29/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.39it/s]

Batch 2600/4538 | Loss: 140.521866


Epoch 29/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.42it/s]

Batch 2800/4538 | Loss: 136.009903


Epoch 29/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.35it/s]

Batch 3000/4538 | Loss: 137.332535


Epoch 29/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 82.801308


Epoch 29/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 35.07it/s]

Batch 3400/4538 | Loss: 99.835464


Epoch 29/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 35.05it/s]

Batch 3600/4538 | Loss: 138.193832


Epoch 29/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.51it/s]

Batch 3800/4538 | Loss: 138.105225


Epoch 29/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.43it/s]

Batch 4000/4538 | Loss: 137.037552


Epoch 29/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.47it/s]

Batch 4200/4538 | Loss: 152.471558


Epoch 29/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.13it/s]

Batch 4400/4538 | Loss: 118.048096


Epoch 29/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.51it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.76it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.72it/s]


Epoch 29/100 | Average Train Loss: 125.020004 | Average Validation Loss: 122.823250
Saved new best model with validation loss: 122.823250


Epoch 30/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.27it/s]

Batch 0/4538 | Loss: 154.726868


Epoch 30/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.69it/s]

Batch 200/4538 | Loss: 103.675621


Epoch 30/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.76it/s]

Batch 400/4538 | Loss: 109.019775


Epoch 30/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:52, 34.85it/s]

Batch 600/4538 | Loss: 134.109985


Epoch 30/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.73it/s]

Batch 800/4538 | Loss: 97.184296


Epoch 30/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.67it/s]

Batch 1000/4538 | Loss: 132.722321


Epoch 30/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.60it/s]

Batch 1200/4538 | Loss: 112.878105


Epoch 30/100 [Train]:  31%|███       | 1407/4538 [00:40<01:31, 34.38it/s]

Batch 1400/4538 | Loss: 141.994919


Epoch 30/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:16, 38.36it/s]

Batch 1600/4538 | Loss: 146.612579


Epoch 30/100 [Train]:  40%|███▉      | 1807/4538 [00:51<01:10, 38.49it/s]

Batch 1800/4538 | Loss: 129.857239


Epoch 30/100 [Train]:  44%|████▍     | 2007/4538 [00:56<01:05, 38.39it/s]

Batch 2000/4538 | Loss: 135.033951


Epoch 30/100 [Train]:  49%|████▊     | 2207/4538 [01:01<01:00, 38.44it/s]

Batch 2200/4538 | Loss: 99.337685


Epoch 30/100 [Train]:  53%|█████▎    | 2407/4538 [01:06<00:55, 38.38it/s]

Batch 2400/4538 | Loss: 134.113708


Epoch 30/100 [Train]:  57%|█████▋    | 2607/4538 [01:12<00:50, 38.45it/s]

Batch 2600/4538 | Loss: 145.632568


Epoch 30/100 [Train]:  62%|██████▏   | 2807/4538 [01:17<00:45, 38.38it/s]

Batch 2800/4538 | Loss: 117.086449


Epoch 30/100 [Train]:  66%|██████▋   | 3007/4538 [01:22<00:39, 38.30it/s]

Batch 3000/4538 | Loss: 97.632065


Epoch 30/100 [Train]:  71%|███████   | 3207/4538 [01:27<00:34, 38.38it/s]

Batch 3200/4538 | Loss: 137.205139


Epoch 30/100 [Train]:  75%|███████▌  | 3407/4538 [01:33<00:29, 38.42it/s]

Batch 3400/4538 | Loss: 141.103439


Epoch 30/100 [Train]:  79%|███████▉  | 3607/4538 [01:38<00:24, 38.40it/s]

Batch 3600/4538 | Loss: 68.173302


Epoch 30/100 [Train]:  84%|████████▍ | 3807/4538 [01:43<00:19, 38.38it/s]

Batch 3800/4538 | Loss: 152.938828


Epoch 30/100 [Train]:  88%|████████▊ | 4007/4538 [01:48<00:13, 38.31it/s]

Batch 4000/4538 | Loss: 141.505859


Epoch 30/100 [Train]:  93%|█████████▎| 4207/4538 [01:53<00:08, 38.08it/s]

Batch 4200/4538 | Loss: 118.101807


Epoch 30/100 [Train]:  97%|█████████▋| 4407/4538 [01:59<00:03, 38.37it/s]

Batch 4400/4538 | Loss: 139.024765


Epoch 30/100 [Train]: 100%|██████████| 4538/4538 [02:02<00:00, 37.02it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.24it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.36it/s]


Epoch 30/100 | Average Train Loss: 122.148621 | Average Validation Loss: 119.998884
Saved new best model with validation loss: 119.998884


Epoch 31/100 [Train]:   0%|          | 7/4538 [00:00<02:09, 34.96it/s]

Batch 0/4538 | Loss: 123.985085


Epoch 31/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.47it/s]

Batch 200/4538 | Loss: 93.146400


Epoch 31/100 [Train]:   9%|▉         | 407/4538 [00:10<01:47, 38.40it/s]

Batch 400/4538 | Loss: 135.196777


Epoch 31/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:41, 38.62it/s]

Batch 600/4538 | Loss: 131.878357


Epoch 31/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.50it/s]

Batch 800/4538 | Loss: 151.273682


Epoch 31/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.40it/s]

Batch 1000/4538 | Loss: 106.339310


Epoch 31/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.32it/s]

Batch 1200/4538 | Loss: 115.774361


Epoch 31/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.37it/s]

Batch 1400/4538 | Loss: 106.360016


Epoch 31/100 [Train]:  35%|███▌      | 1605/4538 [00:41<01:11, 41.26it/s]

Batch 1600/4538 | Loss: 133.663773


Epoch 31/100 [Train]:  40%|███▉      | 1805/4538 [00:46<01:06, 41.15it/s]

Batch 1800/4538 | Loss: 124.239960


Epoch 31/100 [Train]:  44%|████▍     | 2005/4538 [00:51<01:01, 41.47it/s]

Batch 2000/4538 | Loss: 139.836914


Epoch 31/100 [Train]:  49%|████▊     | 2205/4538 [00:56<00:56, 41.19it/s]

Batch 2200/4538 | Loss: 142.431458


Epoch 31/100 [Train]:  53%|█████▎    | 2405/4538 [01:01<00:51, 41.11it/s]

Batch 2400/4538 | Loss: 145.573135


Epoch 31/100 [Train]:  57%|█████▋    | 2605/4538 [01:05<00:47, 41.05it/s]

Batch 2600/4538 | Loss: 97.189041


Epoch 31/100 [Train]:  62%|██████▏   | 2805/4538 [01:10<00:42, 41.18it/s]

Batch 2800/4538 | Loss: 106.158684


Epoch 31/100 [Train]:  66%|██████▌   | 3005/4538 [01:15<00:37, 41.13it/s]

Batch 3000/4538 | Loss: 100.651848


Epoch 31/100 [Train]:  71%|███████   | 3205/4538 [01:20<00:32, 41.28it/s]

Batch 3200/4538 | Loss: 142.434357


Epoch 31/100 [Train]:  75%|███████▌  | 3405/4538 [01:25<00:27, 40.92it/s]

Batch 3400/4538 | Loss: 68.199692


Epoch 31/100 [Train]:  79%|███████▉  | 3605/4538 [01:30<00:22, 41.16it/s]

Batch 3600/4538 | Loss: 146.297684


Epoch 31/100 [Train]:  84%|████████▍ | 3805/4538 [01:35<00:17, 41.17it/s]

Batch 3800/4538 | Loss: 152.258972


Epoch 31/100 [Train]:  88%|████████▊ | 4005/4538 [01:39<00:13, 40.32it/s]

Batch 4000/4538 | Loss: 76.251686


Epoch 31/100 [Train]:  93%|█████████▎| 4205/4538 [01:44<00:08, 41.29it/s]

Batch 4200/4538 | Loss: 77.294731


Epoch 31/100 [Train]:  97%|█████████▋| 4405/4538 [01:49<00:03, 41.22it/s]

Batch 4400/4538 | Loss: 105.516182


Epoch 31/100 [Train]: 100%|██████████| 4538/4538 [01:52<00:00, 40.22it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.28it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.54it/s]


Epoch 31/100 | Average Train Loss: 119.526031 | Average Validation Loss: 117.438098
Saved new best model with validation loss: 117.438098


Epoch 32/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 136.525650


Epoch 32/100 [Train]:   4%|▍         | 204/4538 [00:05<01:45, 41.10it/s]

Batch 200/4538 | Loss: 128.382095


Epoch 32/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.19it/s]

Batch 400/4538 | Loss: 90.343636


Epoch 32/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:36, 40.90it/s]

Batch 600/4538 | Loss: 127.221489


Epoch 32/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 40.85it/s]

Batch 800/4538 | Loss: 147.244385


Epoch 32/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.38it/s]

Batch 1000/4538 | Loss: 105.650551


Epoch 32/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.03it/s]

Batch 1200/4538 | Loss: 133.233948


Epoch 32/100 [Train]:  31%|███       | 1404/4538 [00:34<01:18, 39.92it/s]

Batch 1400/4538 | Loss: 137.700821


Epoch 32/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:12, 40.69it/s]

Batch 1600/4538 | Loss: 131.901566


Epoch 32/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.07it/s]

Batch 1800/4538 | Loss: 99.485672


Epoch 32/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.03it/s]

Batch 2000/4538 | Loss: 99.293175


Epoch 32/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:57, 40.94it/s]

Batch 2200/4538 | Loss: 119.439369


Epoch 32/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 41.02it/s]

Batch 2400/4538 | Loss: 132.567093


Epoch 32/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:46, 41.17it/s]

Batch 2600/4538 | Loss: 117.704582


Epoch 32/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.04it/s]

Batch 2800/4538 | Loss: 135.579788


Epoch 32/100 [Train]:  66%|██████▌   | 3004/4538 [01:13<00:37, 40.91it/s]

Batch 3000/4538 | Loss: 78.235199


Epoch 32/100 [Train]:  71%|███████   | 3204/4538 [01:18<00:32, 41.01it/s]

Batch 3200/4538 | Loss: 84.534653


Epoch 32/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 40.98it/s]

Batch 3400/4538 | Loss: 144.225861


Epoch 32/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.27it/s]

Batch 3600/4538 | Loss: 135.255524


Epoch 32/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.08it/s]

Batch 3800/4538 | Loss: 112.756927


Epoch 32/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:13, 40.87it/s]

Batch 4000/4538 | Loss: 142.211731


Epoch 32/100 [Train]:  93%|█████████▎| 4204/4538 [01:42<00:08, 41.07it/s]

Batch 4200/4538 | Loss: 106.145065


Epoch 32/100 [Train]:  97%|█████████▋| 4404/4538 [01:47<00:03, 40.99it/s]

Batch 4400/4538 | Loss: 100.827209


Epoch 32/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.07it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 131.45it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.64it/s]


Epoch 32/100 | Average Train Loss: 117.100349 | Average Validation Loss: 115.063473
Saved new best model with validation loss: 115.063473


Epoch 33/100 [Train]:   0%|          | 7/4538 [00:00<02:08, 35.14it/s]

Batch 0/4538 | Loss: 74.968880


Epoch 33/100 [Train]:   4%|▍         | 204/4538 [00:05<01:45, 41.04it/s]

Batch 200/4538 | Loss: 70.183258


Epoch 33/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.12it/s]

Batch 400/4538 | Loss: 101.276718


Epoch 33/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.17it/s]

Batch 600/4538 | Loss: 131.608978


Epoch 33/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 40.90it/s]

Batch 800/4538 | Loss: 126.189209


Epoch 33/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.20it/s]

Batch 1000/4538 | Loss: 103.974075


Epoch 33/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.17it/s]

Batch 1200/4538 | Loss: 102.306404


Epoch 33/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 40.81it/s]

Batch 1400/4538 | Loss: 129.381317


Epoch 33/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:11, 40.97it/s]

Batch 1600/4538 | Loss: 99.342331


Epoch 33/100 [Train]:  40%|███▉      | 1804/4538 [00:44<01:06, 41.07it/s]

Batch 1800/4538 | Loss: 135.111115


Epoch 33/100 [Train]:  44%|████▍     | 2006/4538 [00:49<01:05, 38.50it/s]

Batch 2000/4538 | Loss: 160.422729


Epoch 33/100 [Train]:  49%|████▊     | 2206/4538 [00:54<01:00, 38.43it/s]

Batch 2200/4538 | Loss: 120.854507


Epoch 33/100 [Train]:  53%|█████▎    | 2406/4538 [00:59<00:55, 38.51it/s]

Batch 2400/4538 | Loss: 123.995087


Epoch 33/100 [Train]:  57%|█████▋    | 2606/4538 [01:04<00:50, 38.48it/s]

Batch 2600/4538 | Loss: 97.678154


Epoch 33/100 [Train]:  62%|██████▏   | 2806/4538 [01:10<00:45, 38.48it/s]

Batch 2800/4538 | Loss: 91.526909


Epoch 33/100 [Train]:  66%|██████▌   | 3006/4538 [01:15<00:39, 38.44it/s]

Batch 3000/4538 | Loss: 91.350014


Epoch 33/100 [Train]:  71%|███████   | 3206/4538 [01:20<00:34, 38.30it/s]

Batch 3200/4538 | Loss: 116.454651


Epoch 33/100 [Train]:  75%|███████▌  | 3406/4538 [01:25<00:29, 38.32it/s]

Batch 3400/4538 | Loss: 113.157471


Epoch 33/100 [Train]:  79%|███████▉  | 3606/4538 [01:30<00:24, 38.34it/s]

Batch 3600/4538 | Loss: 103.545372


Epoch 33/100 [Train]:  84%|████████▍ | 3806/4538 [01:36<00:19, 38.29it/s]

Batch 3800/4538 | Loss: 110.940208


Epoch 33/100 [Train]:  88%|████████▊ | 4006/4538 [01:41<00:13, 38.32it/s]

Batch 4000/4538 | Loss: 77.935226


Epoch 33/100 [Train]:  93%|█████████▎| 4206/4538 [01:46<00:08, 37.66it/s]

Batch 4200/4538 | Loss: 82.494179


Epoch 33/100 [Train]:  97%|█████████▋| 4406/4538 [01:51<00:03, 38.41it/s]

Batch 4400/4538 | Loss: 92.663582


Epoch 33/100 [Train]: 100%|██████████| 4538/4538 [01:55<00:00, 39.38it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.04it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.77it/s]


Epoch 33/100 | Average Train Loss: 114.841377 | Average Validation Loss: 112.860996
Saved new best model with validation loss: 112.860996


Epoch 34/100 [Train]:   0%|          | 4/4538 [00:00<02:14, 33.72it/s]

Batch 0/4538 | Loss: 109.800087


Epoch 34/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.09it/s]

Batch 200/4538 | Loss: 118.410065


Epoch 34/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.01it/s]

Batch 400/4538 | Loss: 99.315140


Epoch 34/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.22it/s]

Batch 600/4538 | Loss: 143.887421


Epoch 34/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.04it/s]

Batch 800/4538 | Loss: 116.334610


Epoch 34/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.76it/s]

Batch 1000/4538 | Loss: 154.214767


Epoch 34/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 40.91it/s]

Batch 1200/4538 | Loss: 73.431252


Epoch 34/100 [Train]:  31%|███       | 1404/4538 [00:34<01:15, 41.32it/s]

Batch 1400/4538 | Loss: 128.277328


Epoch 34/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:13, 40.00it/s]

Batch 1600/4538 | Loss: 94.152718


Epoch 34/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.42it/s]

Batch 1800/4538 | Loss: 84.738480


Epoch 34/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.11it/s]

Batch 2000/4538 | Loss: 131.785126


Epoch 34/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.39it/s]

Batch 2200/4538 | Loss: 132.033386


Epoch 34/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.32it/s]

Batch 2400/4538 | Loss: 103.648087


Epoch 34/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 41.07it/s]

Batch 2600/4538 | Loss: 136.312973


Epoch 34/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.12it/s]

Batch 2800/4538 | Loss: 122.289963


Epoch 34/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.25it/s]

Batch 3000/4538 | Loss: 95.189430


Epoch 34/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.23it/s]

Batch 3200/4538 | Loss: 120.612381


Epoch 34/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.42it/s]

Batch 3400/4538 | Loss: 103.750923


Epoch 34/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.25it/s]

Batch 3600/4538 | Loss: 148.790680


Epoch 34/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 40.84it/s]

Batch 3800/4538 | Loss: 81.940742


Epoch 34/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:12, 41.31it/s]

Batch 4000/4538 | Loss: 108.509087


Epoch 34/100 [Train]:  93%|█████████▎| 4204/4538 [01:42<00:08, 41.00it/s]

Batch 4200/4538 | Loss: 106.618683


Epoch 34/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 40.80it/s]

Batch 4400/4538 | Loss: 90.110382


Epoch 34/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.21it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.54it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.66it/s]


Epoch 34/100 | Average Train Loss: 112.732162 | Average Validation Loss: 110.833955
Saved new best model with validation loss: 110.833955


Epoch 35/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 100.135864


Epoch 35/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.05it/s]

Batch 200/4538 | Loss: 126.180458


Epoch 35/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.09it/s]

Batch 400/4538 | Loss: 92.911636


Epoch 35/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.11it/s]

Batch 600/4538 | Loss: 107.036919


Epoch 35/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.09it/s]

Batch 800/4538 | Loss: 125.583374


Epoch 35/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.23it/s]

Batch 1000/4538 | Loss: 111.423645


Epoch 35/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.12it/s]

Batch 1200/4538 | Loss: 106.851814


Epoch 35/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 41.06it/s]

Batch 1400/4538 | Loss: 73.912910


Epoch 35/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:11, 41.18it/s]

Batch 1600/4538 | Loss: 91.525993


Epoch 35/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.19it/s]

Batch 1800/4538 | Loss: 120.794922


Epoch 35/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.39it/s]

Batch 2000/4538 | Loss: 136.269714


Epoch 35/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.12it/s]

Batch 2200/4538 | Loss: 136.643204


Epoch 35/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.09it/s]

Batch 2400/4538 | Loss: 120.878746


Epoch 35/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:46, 41.30it/s]

Batch 2600/4538 | Loss: 67.215286


Epoch 35/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.17it/s]

Batch 2800/4538 | Loss: 126.392929


Epoch 35/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:36, 41.48it/s]

Batch 3000/4538 | Loss: 113.682014


Epoch 35/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.22it/s]

Batch 3200/4538 | Loss: 79.378777


Epoch 35/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.05it/s]

Batch 3400/4538 | Loss: 88.471153


Epoch 35/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.34it/s]

Batch 3600/4538 | Loss: 87.747551


Epoch 35/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.03it/s]

Batch 3800/4538 | Loss: 99.488602


Epoch 35/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:13, 40.63it/s]

Batch 4000/4538 | Loss: 101.270020


Epoch 35/100 [Train]:  93%|█████████▎| 4204/4538 [01:42<00:08, 41.09it/s]

Batch 4200/4538 | Loss: 89.020264


Epoch 35/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 41.29it/s]

Batch 4400/4538 | Loss: 105.066444


Epoch 35/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.19it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.18it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.91it/s]


Epoch 35/100 | Average Train Loss: 110.772606 | Average Validation Loss: 108.891625
Saved new best model with validation loss: 108.891625


Epoch 36/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 95.097008


Epoch 36/100 [Train]:   4%|▍         | 204/4538 [00:04<01:47, 40.16it/s]

Batch 200/4538 | Loss: 107.124260


Epoch 36/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.09it/s]

Batch 400/4538 | Loss: 124.329529


Epoch 36/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.29it/s]

Batch 600/4538 | Loss: 108.513222


Epoch 36/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.22it/s]

Batch 800/4538 | Loss: 75.587418


Epoch 36/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.52it/s]

Batch 1000/4538 | Loss: 117.708313


Epoch 36/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.28it/s]

Batch 1200/4538 | Loss: 105.602455


Epoch 36/100 [Train]:  31%|███       | 1404/4538 [00:34<01:15, 41.50it/s]

Batch 1400/4538 | Loss: 107.653839


Epoch 36/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:11, 41.28it/s]

Batch 1600/4538 | Loss: 128.774582


Epoch 36/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:05, 41.49it/s]

Batch 1800/4538 | Loss: 124.508339


Epoch 36/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.21it/s]

Batch 2000/4538 | Loss: 103.622726


Epoch 36/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.26it/s]

Batch 2200/4538 | Loss: 118.103752


Epoch 36/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.46it/s]

Batch 2400/4538 | Loss: 108.585068


Epoch 36/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:46, 41.48it/s]

Batch 2600/4538 | Loss: 123.858871


Epoch 36/100 [Train]:  62%|██████▏   | 2804/4538 [01:07<00:41, 41.29it/s]

Batch 2800/4538 | Loss: 133.239960


Epoch 36/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 40.86it/s]

Batch 3000/4538 | Loss: 85.116699


Epoch 36/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.21it/s]

Batch 3200/4538 | Loss: 139.179886


Epoch 36/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 40.93it/s]

Batch 3400/4538 | Loss: 75.055023


Epoch 36/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.04it/s]

Batch 3600/4538 | Loss: 121.057442


Epoch 36/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.13it/s]

Batch 3800/4538 | Loss: 101.143288


Epoch 36/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:12, 41.21it/s]

Batch 4000/4538 | Loss: 112.167465


Epoch 36/100 [Train]:  93%|█████████▎| 4204/4538 [01:41<00:08, 41.15it/s]

Batch 4200/4538 | Loss: 121.208054


Epoch 36/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 41.39it/s]

Batch 4400/4538 | Loss: 81.689377


Epoch 36/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.24it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.30it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.04it/s]


Epoch 36/100 | Average Train Loss: 108.933502 | Average Validation Loss: 107.159533
Saved new best model with validation loss: 107.159533


Epoch 37/100 [Train]:   0%|          | 4/4538 [00:00<02:16, 33.15it/s]

Batch 0/4538 | Loss: 115.650749


Epoch 37/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.09it/s]

Batch 200/4538 | Loss: 148.620743


Epoch 37/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.20it/s]

Batch 400/4538 | Loss: 101.078911


Epoch 37/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.06it/s]

Batch 600/4538 | Loss: 87.349121


Epoch 37/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.41it/s]

Batch 800/4538 | Loss: 108.025536


Epoch 37/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.99it/s]

Batch 1000/4538 | Loss: 122.513756


Epoch 37/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.21it/s]

Batch 1200/4538 | Loss: 100.281349


Epoch 37/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 41.03it/s]

Batch 1400/4538 | Loss: 110.621346


Epoch 37/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:10, 41.42it/s]

Batch 1600/4538 | Loss: 134.907349


Epoch 37/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.40it/s]

Batch 1800/4538 | Loss: 90.892792


Epoch 37/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.36it/s]

Batch 2000/4538 | Loss: 69.941536


Epoch 37/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.43it/s]

Batch 2200/4538 | Loss: 105.079285


Epoch 37/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.92it/s]

Batch 2400/4538 | Loss: 106.231377


Epoch 37/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 40.62it/s]

Batch 2600/4538 | Loss: 86.191841


Epoch 37/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.11it/s]

Batch 2800/4538 | Loss: 121.383774


Epoch 37/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.29it/s]

Batch 3000/4538 | Loss: 108.427330


Epoch 37/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 40.58it/s]

Batch 3200/4538 | Loss: 116.368301


Epoch 37/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.34it/s]

Batch 3400/4538 | Loss: 123.576576


Epoch 37/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.35it/s]

Batch 3600/4538 | Loss: 105.154587


Epoch 37/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.45it/s]

Batch 3800/4538 | Loss: 115.254768


Epoch 37/100 [Train]:  88%|████████▊ | 4008/4538 [01:37<00:12, 41.17it/s]

Batch 4000/4538 | Loss: 98.080101


Epoch 37/100 [Train]:  93%|█████████▎| 4208/4538 [01:42<00:07, 41.33it/s]

Batch 4200/4538 | Loss: 112.877235


Epoch 37/100 [Train]:  97%|█████████▋| 4408/4538 [01:47<00:03, 41.20it/s]

Batch 4400/4538 | Loss: 123.572540


Epoch 37/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.18it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.06it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.77it/s]


Epoch 37/100 | Average Train Loss: 107.227776 | Average Validation Loss: 105.498237
Saved new best model with validation loss: 105.498237


Epoch 38/100 [Train]:   0%|          | 4/4538 [00:00<02:39, 28.34it/s]

Batch 0/4538 | Loss: 121.501015


Epoch 38/100 [Train]:   5%|▍         | 207/4538 [00:05<01:45, 40.97it/s]

Batch 200/4538 | Loss: 87.815987


Epoch 38/100 [Train]:   9%|▉         | 407/4538 [00:09<01:40, 41.16it/s]

Batch 400/4538 | Loss: 98.234238


Epoch 38/100 [Train]:  13%|█▎        | 607/4538 [00:14<01:35, 41.07it/s]

Batch 600/4538 | Loss: 111.280647


Epoch 38/100 [Train]:  18%|█▊        | 807/4538 [00:19<01:31, 40.97it/s]

Batch 800/4538 | Loss: 107.511612


Epoch 38/100 [Train]:  22%|██▏       | 1007/4538 [00:24<01:25, 41.18it/s]

Batch 1000/4538 | Loss: 106.103676


Epoch 38/100 [Train]:  27%|██▋       | 1207/4538 [00:29<01:21, 41.04it/s]

Batch 1200/4538 | Loss: 103.660187


Epoch 38/100 [Train]:  31%|███       | 1405/4538 [00:34<01:21, 38.60it/s]

Batch 1400/4538 | Loss: 111.052216


Epoch 38/100 [Train]:  35%|███▌      | 1605/4538 [00:39<01:15, 38.66it/s]

Batch 1600/4538 | Loss: 118.839127


Epoch 38/100 [Train]:  40%|███▉      | 1805/4538 [00:44<01:10, 38.72it/s]

Batch 1800/4538 | Loss: 108.393341


Epoch 38/100 [Train]:  44%|████▍     | 2005/4538 [00:49<01:05, 38.48it/s]

Batch 2000/4538 | Loss: 100.043488


Epoch 38/100 [Train]:  49%|████▊     | 2205/4538 [00:55<01:00, 38.68it/s]

Batch 2200/4538 | Loss: 120.405487


Epoch 38/100 [Train]:  53%|█████▎    | 2405/4538 [01:00<00:57, 37.30it/s]

Batch 2400/4538 | Loss: 101.388855


Epoch 38/100 [Train]:  57%|█████▋    | 2605/4538 [01:05<00:49, 38.72it/s]

Batch 2600/4538 | Loss: 112.346062


Epoch 38/100 [Train]:  62%|██████▏   | 2805/4538 [01:10<00:44, 38.72it/s]

Batch 2800/4538 | Loss: 110.566322


Epoch 38/100 [Train]:  66%|██████▌   | 3005/4538 [01:15<00:40, 37.58it/s]

Batch 3000/4538 | Loss: 95.925552


Epoch 38/100 [Train]:  71%|███████   | 3205/4538 [01:20<00:34, 38.81it/s]

Batch 3200/4538 | Loss: 96.298241


Epoch 38/100 [Train]:  75%|███████▌  | 3405/4538 [01:26<00:29, 38.51it/s]

Batch 3400/4538 | Loss: 92.311874


Epoch 38/100 [Train]:  79%|███████▉  | 3605/4538 [01:31<00:24, 38.22it/s]

Batch 3600/4538 | Loss: 111.910255


Epoch 38/100 [Train]:  84%|████████▍ | 3805/4538 [01:36<00:18, 38.68it/s]

Batch 3800/4538 | Loss: 124.645966


Epoch 38/100 [Train]:  88%|████████▊ | 4005/4538 [01:41<00:13, 38.69it/s]

Batch 4000/4538 | Loss: 93.593819


Epoch 38/100 [Train]:  93%|█████████▎| 4205/4538 [01:46<00:08, 38.69it/s]

Batch 4200/4538 | Loss: 100.191650


Epoch 38/100 [Train]:  97%|█████████▋| 4405/4538 [01:51<00:03, 38.67it/s]

Batch 4400/4538 | Loss: 99.263992


Epoch 38/100 [Train]: 100%|██████████| 4538/4538 [01:55<00:00, 39.35it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 134.66it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.90it/s]


Epoch 38/100 | Average Train Loss: 105.636375 | Average Validation Loss: 103.961926
Saved new best model with validation loss: 103.961926


Epoch 39/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 97.648842


Epoch 39/100 [Train]:   5%|▍         | 207/4538 [00:05<01:51, 38.83it/s]

Batch 200/4538 | Loss: 97.352783


Epoch 39/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.84it/s]

Batch 400/4538 | Loss: 114.194862


Epoch 39/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:41, 38.90it/s]

Batch 600/4538 | Loss: 111.660187


Epoch 39/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.79it/s]

Batch 800/4538 | Loss: 120.132584


Epoch 39/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.54it/s]

Batch 1000/4538 | Loss: 107.345909


Epoch 39/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.52it/s]

Batch 1200/4538 | Loss: 117.112846


Epoch 39/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.59it/s]

Batch 1400/4538 | Loss: 66.330170


Epoch 39/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.54it/s]

Batch 1600/4538 | Loss: 94.039886


Epoch 39/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:10, 38.52it/s]

Batch 1800/4538 | Loss: 131.999283


Epoch 39/100 [Train]:  44%|████▍     | 2007/4538 [00:51<01:05, 38.35it/s]

Batch 2000/4538 | Loss: 118.021637


Epoch 39/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.54it/s]

Batch 2200/4538 | Loss: 92.287086


Epoch 39/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.61it/s]

Batch 2400/4538 | Loss: 119.606483


Epoch 39/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.56it/s]

Batch 2600/4538 | Loss: 95.298141


Epoch 39/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.50it/s]

Batch 2800/4538 | Loss: 107.674995


Epoch 39/100 [Train]:  66%|██████▋   | 3007/4538 [01:17<00:39, 38.48it/s]

Batch 3000/4538 | Loss: 101.037552


Epoch 39/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.51it/s]

Batch 3200/4538 | Loss: 97.826714


Epoch 39/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.50it/s]

Batch 3400/4538 | Loss: 116.847298


Epoch 39/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.44it/s]

Batch 3600/4538 | Loss: 110.601852


Epoch 39/100 [Train]:  84%|████████▍ | 3807/4538 [01:38<00:19, 38.19it/s]

Batch 3800/4538 | Loss: 110.400002


Epoch 39/100 [Train]:  88%|████████▊ | 4007/4538 [01:43<00:13, 38.56it/s]

Batch 4000/4538 | Loss: 98.529015


Epoch 39/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.44it/s]

Batch 4200/4538 | Loss: 90.795494


Epoch 39/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.35it/s]

Batch 4400/4538 | Loss: 123.099831


Epoch 39/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.55it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.95it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.42it/s]


Epoch 39/100 | Average Train Loss: 104.156883 | Average Validation Loss: 102.553506
Saved new best model with validation loss: 102.553506


Epoch 40/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 98.304787


Epoch 40/100 [Train]:   5%|▍         | 208/4538 [00:05<01:51, 38.74it/s]

Batch 200/4538 | Loss: 138.463242


Epoch 40/100 [Train]:   9%|▉         | 408/4538 [00:10<01:46, 38.70it/s]

Batch 400/4538 | Loss: 120.530777


Epoch 40/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:41, 38.70it/s]

Batch 600/4538 | Loss: 97.837234


Epoch 40/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:36, 38.58it/s]

Batch 800/4538 | Loss: 94.134567


Epoch 40/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:31, 38.50it/s]

Batch 1000/4538 | Loss: 88.342773


Epoch 40/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:26, 38.59it/s]

Batch 1200/4538 | Loss: 111.429726


Epoch 40/100 [Train]:  31%|███       | 1408/4538 [00:36<01:21, 38.59it/s]

Batch 1400/4538 | Loss: 76.114868


Epoch 40/100 [Train]:  35%|███▌      | 1608/4538 [00:41<01:16, 38.44it/s]

Batch 1600/4538 | Loss: 125.574654


Epoch 40/100 [Train]:  40%|███▉      | 1808/4538 [00:46<01:10, 38.56it/s]

Batch 1800/4538 | Loss: 85.187714


Epoch 40/100 [Train]:  44%|████▍     | 2008/4538 [00:52<01:05, 38.57it/s]

Batch 2000/4538 | Loss: 114.030151


Epoch 40/100 [Train]:  49%|████▊     | 2208/4538 [00:57<01:00, 38.61it/s]

Batch 2200/4538 | Loss: 131.360870


Epoch 40/100 [Train]:  53%|█████▎    | 2408/4538 [01:02<00:55, 38.69it/s]

Batch 2400/4538 | Loss: 138.558289


Epoch 40/100 [Train]:  57%|█████▋    | 2608/4538 [01:07<00:49, 38.69it/s]

Batch 2600/4538 | Loss: 99.946892


Epoch 40/100 [Train]:  62%|██████▏   | 2808/4538 [01:12<00:44, 38.60it/s]

Batch 2800/4538 | Loss: 113.683502


Epoch 40/100 [Train]:  66%|██████▋   | 3008/4538 [01:17<00:37, 41.33it/s]

Batch 3000/4538 | Loss: 92.889404


Epoch 40/100 [Train]:  71%|███████   | 3208/4538 [01:22<00:32, 41.39it/s]

Batch 3200/4538 | Loss: 104.650482


Epoch 40/100 [Train]:  75%|███████▌  | 3408/4538 [01:27<00:27, 41.22it/s]

Batch 3400/4538 | Loss: 88.166718


Epoch 40/100 [Train]:  80%|███████▉  | 3608/4538 [01:32<00:22, 41.47it/s]

Batch 3600/4538 | Loss: 65.321144


Epoch 40/100 [Train]:  84%|████████▍ | 3808/4538 [01:37<00:17, 41.49it/s]

Batch 3800/4538 | Loss: 91.688042


Epoch 40/100 [Train]:  88%|████████▊ | 4008/4538 [01:41<00:12, 41.52it/s]

Batch 4000/4538 | Loss: 112.531136


Epoch 40/100 [Train]:  93%|█████████▎| 4208/4538 [01:46<00:07, 41.62it/s]

Batch 4200/4538 | Loss: 112.275360


Epoch 40/100 [Train]:  97%|█████████▋| 4408/4538 [01:51<00:03, 41.48it/s]

Batch 4400/4538 | Loss: 103.649117


Epoch 40/100 [Train]: 100%|██████████| 4538/4538 [01:54<00:00, 39.54it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.59it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 99.17it/s] 


Epoch 40/100 | Average Train Loss: 102.762512 | Average Validation Loss: 101.164543
Saved new best model with validation loss: 101.164543


Epoch 41/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 101.261185


Epoch 41/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.20it/s]

Batch 200/4538 | Loss: 130.088165


Epoch 41/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.30it/s]

Batch 400/4538 | Loss: 121.832977


Epoch 41/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:34, 41.50it/s]

Batch 600/4538 | Loss: 126.214020


Epoch 41/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.19it/s]

Batch 800/4538 | Loss: 76.615982


Epoch 41/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.95it/s]

Batch 1000/4538 | Loss: 96.854324


Epoch 41/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.13it/s]

Batch 1200/4538 | Loss: 139.018448


Epoch 41/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 41.22it/s]

Batch 1400/4538 | Loss: 122.722458


Epoch 41/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:10, 41.59it/s]

Batch 1600/4538 | Loss: 120.291679


Epoch 41/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 40.99it/s]

Batch 1800/4538 | Loss: 104.681168


Epoch 41/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.19it/s]

Batch 2000/4538 | Loss: 93.297279


Epoch 41/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.07it/s]

Batch 2200/4538 | Loss: 107.360909


Epoch 41/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.24it/s]

Batch 2400/4538 | Loss: 100.657829


Epoch 41/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 41.08it/s]

Batch 2600/4538 | Loss: 96.068436


Epoch 41/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 41.02it/s]

Batch 2800/4538 | Loss: 102.824631


Epoch 41/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.34it/s]

Batch 3000/4538 | Loss: 98.030487


Epoch 41/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.15it/s]

Batch 3200/4538 | Loss: 97.219238


Epoch 41/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.00it/s]

Batch 3400/4538 | Loss: 115.734207


Epoch 41/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.16it/s]

Batch 3600/4538 | Loss: 82.197441


Epoch 41/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.17it/s]

Batch 3800/4538 | Loss: 121.137283


Epoch 41/100 [Train]:  88%|████████▊ | 4008/4538 [01:37<00:13, 39.75it/s]

Batch 4000/4538 | Loss: 75.471092


Epoch 41/100 [Train]:  93%|█████████▎| 4208/4538 [01:42<00:07, 41.35it/s]

Batch 4200/4538 | Loss: 109.897255


Epoch 41/100 [Train]:  97%|█████████▋| 4408/4538 [01:46<00:03, 41.20it/s]

Batch 4400/4538 | Loss: 101.190750


Epoch 41/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.20it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 131.65it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.27it/s]


Epoch 41/100 | Average Train Loss: 101.476654 | Average Validation Loss: 99.963518
Saved new best model with validation loss: 99.963518


Epoch 42/100 [Train]:   0%|          | 4/4538 [00:00<02:16, 33.13it/s]

Batch 0/4538 | Loss: 79.203400


Epoch 42/100 [Train]:   4%|▍         | 204/4538 [00:04<01:46, 40.70it/s]

Batch 200/4538 | Loss: 93.938622


Epoch 42/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.18it/s]

Batch 400/4538 | Loss: 116.935753


Epoch 42/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.02it/s]

Batch 600/4538 | Loss: 89.319069


Epoch 42/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.09it/s]

Batch 800/4538 | Loss: 91.487389


Epoch 42/100 [Train]:  22%|██▏       | 1005/4538 [00:25<01:42, 34.53it/s]

Batch 1000/4538 | Loss: 104.898651


Epoch 42/100 [Train]:  27%|██▋       | 1205/4538 [00:31<01:36, 34.54it/s]

Batch 1200/4538 | Loss: 82.926895


Epoch 42/100 [Train]:  31%|███       | 1405/4538 [00:36<01:30, 34.45it/s]

Batch 1400/4538 | Loss: 103.737259


Epoch 42/100 [Train]:  35%|███▌      | 1605/4538 [00:42<01:25, 34.49it/s]

Batch 1600/4538 | Loss: 95.770630


Epoch 42/100 [Train]:  40%|███▉      | 1805/4538 [00:48<01:19, 34.57it/s]

Batch 1800/4538 | Loss: 95.037369


Epoch 42/100 [Train]:  44%|████▍     | 2005/4538 [00:54<01:13, 34.61it/s]

Batch 2000/4538 | Loss: 111.576744


Epoch 42/100 [Train]:  49%|████▊     | 2205/4538 [01:00<01:07, 34.55it/s]

Batch 2200/4538 | Loss: 95.690521


Epoch 42/100 [Train]:  53%|█████▎    | 2405/4538 [01:05<01:01, 34.58it/s]

Batch 2400/4538 | Loss: 118.337204


Epoch 42/100 [Train]:  57%|█████▋    | 2605/4538 [01:11<00:55, 34.57it/s]

Batch 2600/4538 | Loss: 101.170219


Epoch 42/100 [Train]:  62%|██████▏   | 2805/4538 [01:17<00:49, 34.76it/s]

Batch 2800/4538 | Loss: 119.497528


Epoch 42/100 [Train]:  66%|██████▌   | 3005/4538 [01:23<00:44, 34.73it/s]

Batch 3000/4538 | Loss: 69.029495


Epoch 42/100 [Train]:  71%|███████   | 3205/4538 [01:29<00:38, 34.66it/s]

Batch 3200/4538 | Loss: 83.505966


Epoch 42/100 [Train]:  75%|███████▌  | 3405/4538 [01:34<00:33, 33.37it/s]

Batch 3400/4538 | Loss: 124.903389


Epoch 42/100 [Train]:  79%|███████▉  | 3605/4538 [01:40<00:26, 34.70it/s]

Batch 3600/4538 | Loss: 77.641006


Epoch 42/100 [Train]:  84%|████████▍ | 3805/4538 [01:46<00:21, 34.52it/s]

Batch 3800/4538 | Loss: 108.826202


Epoch 42/100 [Train]:  88%|████████▊ | 4005/4538 [01:52<00:15, 34.74it/s]

Batch 4000/4538 | Loss: 122.856651


Epoch 42/100 [Train]:  93%|█████████▎| 4205/4538 [01:57<00:09, 34.63it/s]

Batch 4200/4538 | Loss: 105.265640


Epoch 42/100 [Train]:  97%|█████████▋| 4405/4538 [02:03<00:03, 34.66it/s]

Batch 4400/4538 | Loss: 105.862015


Epoch 42/100 [Train]: 100%|██████████| 4538/4538 [02:07<00:00, 35.59it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.06it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.50it/s]


Epoch 42/100 | Average Train Loss: 100.271912 | Average Validation Loss: 98.836934
Saved new best model with validation loss: 98.836934


Epoch 43/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.39it/s]

Batch 0/4538 | Loss: 91.146523


Epoch 43/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.52it/s]

Batch 200/4538 | Loss: 114.774651


Epoch 43/100 [Train]:   9%|▉         | 407/4538 [00:11<02:01, 33.99it/s]

Batch 400/4538 | Loss: 101.024338


Epoch 43/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.69it/s]

Batch 600/4538 | Loss: 107.376976


Epoch 43/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.76it/s]

Batch 800/4538 | Loss: 122.149796


Epoch 43/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.80it/s]

Batch 1000/4538 | Loss: 108.562363


Epoch 43/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.71it/s]

Batch 1200/4538 | Loss: 105.208984


Epoch 43/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.62it/s]

Batch 1400/4538 | Loss: 88.454506


Epoch 43/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.62it/s]

Batch 1600/4538 | Loss: 105.173248


Epoch 43/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.53it/s]

Batch 1800/4538 | Loss: 91.059578


Epoch 43/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.40it/s]

Batch 2000/4538 | Loss: 106.717972


Epoch 43/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.38it/s]

Batch 2200/4538 | Loss: 75.518242


Epoch 43/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.38it/s]

Batch 2400/4538 | Loss: 76.164429


Epoch 43/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.33it/s]

Batch 2600/4538 | Loss: 88.619934


Epoch 43/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.41it/s]

Batch 2800/4538 | Loss: 97.018982


Epoch 43/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.37it/s]

Batch 3000/4538 | Loss: 92.996445


Epoch 43/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.48it/s]

Batch 3200/4538 | Loss: 94.988808


Epoch 43/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.52it/s]

Batch 3400/4538 | Loss: 75.813217


Epoch 43/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.48it/s]

Batch 3600/4538 | Loss: 72.326988


Epoch 43/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.47it/s]

Batch 3800/4538 | Loss: 91.878815


Epoch 43/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.42it/s]

Batch 4000/4538 | Loss: 100.390884


Epoch 43/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.44it/s]

Batch 4200/4538 | Loss: 123.492218


Epoch 43/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.51it/s]

Batch 4400/4538 | Loss: 124.004959


Epoch 43/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.54it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.97it/s]


Epoch 43/100 | Average Train Loss: 99.155525 | Average Validation Loss: 97.764387
Saved new best model with validation loss: 97.764387


Epoch 44/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.40it/s]

Batch 0/4538 | Loss: 103.825279


Epoch 44/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.46it/s]

Batch 200/4538 | Loss: 112.727875


Epoch 44/100 [Train]:   9%|▉         | 407/4538 [00:11<02:00, 34.37it/s]

Batch 400/4538 | Loss: 93.081299


Epoch 44/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.41it/s]

Batch 600/4538 | Loss: 96.121872


Epoch 44/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.39it/s]

Batch 800/4538 | Loss: 85.232132


Epoch 44/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:43, 34.21it/s]

Batch 1000/4538 | Loss: 92.521118


Epoch 44/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.41it/s]

Batch 1200/4538 | Loss: 90.786812


Epoch 44/100 [Train]:  31%|███       | 1407/4538 [00:40<01:31, 34.38it/s]

Batch 1400/4538 | Loss: 119.196274


Epoch 44/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.40it/s]

Batch 1600/4538 | Loss: 97.170143


Epoch 44/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.40it/s]

Batch 1800/4538 | Loss: 89.952904


Epoch 44/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.44it/s]

Batch 2000/4538 | Loss: 80.663719


Epoch 44/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.31it/s]

Batch 2200/4538 | Loss: 67.360184


Epoch 44/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.52it/s]

Batch 2400/4538 | Loss: 98.405380


Epoch 44/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.45it/s]

Batch 2600/4538 | Loss: 101.634201


Epoch 44/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.41it/s]

Batch 2800/4538 | Loss: 116.277832


Epoch 44/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.52it/s]

Batch 3000/4538 | Loss: 80.502182


Epoch 44/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.49it/s]

Batch 3200/4538 | Loss: 108.227150


Epoch 44/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.49it/s]

Batch 3400/4538 | Loss: 107.717987


Epoch 44/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.39it/s]

Batch 3600/4538 | Loss: 92.519592


Epoch 44/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.45it/s]

Batch 3800/4538 | Loss: 120.273079


Epoch 44/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.42it/s]

Batch 4000/4538 | Loss: 110.537277


Epoch 44/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.41it/s]

Batch 4200/4538 | Loss: 119.383575


Epoch 44/100 [Train]:  97%|█████████▋| 4407/4538 [02:08<00:03, 34.42it/s]

Batch 4400/4538 | Loss: 94.540878


Epoch 44/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.43it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.41it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.66it/s]


Epoch 44/100 | Average Train Loss: 98.101738 | Average Validation Loss: 96.799261
Saved new best model with validation loss: 96.799261


Epoch 45/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 88.593117


Epoch 45/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.48it/s]

Batch 200/4538 | Loss: 96.146187


Epoch 45/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.45it/s]

Batch 400/4538 | Loss: 109.225723


Epoch 45/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.45it/s]

Batch 600/4538 | Loss: 93.689613


Epoch 45/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.54it/s]

Batch 800/4538 | Loss: 98.750175


Epoch 45/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.49it/s]

Batch 1000/4538 | Loss: 94.551758


Epoch 45/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.59it/s]

Batch 1200/4538 | Loss: 89.061172


Epoch 45/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.47it/s]

Batch 1400/4538 | Loss: 88.712067


Epoch 45/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.18it/s]

Batch 1600/4538 | Loss: 105.879745


Epoch 45/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.55it/s]

Batch 1800/4538 | Loss: 98.240845


Epoch 45/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.57it/s]

Batch 2000/4538 | Loss: 113.722931


Epoch 45/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.54it/s]

Batch 2200/4538 | Loss: 100.570801


Epoch 45/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.62it/s]

Batch 2400/4538 | Loss: 98.407272


Epoch 45/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.48it/s]

Batch 2600/4538 | Loss: 57.756413


Epoch 45/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.52it/s]

Batch 2800/4538 | Loss: 79.349594


Epoch 45/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.44it/s]

Batch 3000/4538 | Loss: 94.296265


Epoch 45/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.59it/s]

Batch 3200/4538 | Loss: 112.324936


Epoch 45/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:33, 33.92it/s]

Batch 3400/4538 | Loss: 86.124260


Epoch 45/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.50it/s]

Batch 3600/4538 | Loss: 104.357269


Epoch 45/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.56it/s]

Batch 3800/4538 | Loss: 100.065300


Epoch 45/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.43it/s]

Batch 4000/4538 | Loss: 67.788498


Epoch 45/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.36it/s]

Batch 4200/4538 | Loss: 101.503593


Epoch 45/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.51it/s]

Batch 4400/4538 | Loss: 82.838249


Epoch 45/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.50it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.82it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.08it/s]


Epoch 45/100 | Average Train Loss: 97.110741 | Average Validation Loss: 95.831433
Saved new best model with validation loss: 95.831433


Epoch 46/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 113.638664


Epoch 46/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.71it/s]

Batch 200/4538 | Loss: 67.344460


Epoch 46/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.81it/s]

Batch 400/4538 | Loss: 79.146133


Epoch 46/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.68it/s]

Batch 600/4538 | Loss: 104.161659


Epoch 46/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.75it/s]

Batch 800/4538 | Loss: 108.008606


Epoch 46/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.74it/s]

Batch 1000/4538 | Loss: 102.031960


Epoch 46/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.56it/s]

Batch 1200/4538 | Loss: 103.343575


Epoch 46/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.76it/s]

Batch 1400/4538 | Loss: 119.696625


Epoch 46/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.89it/s]

Batch 1600/4538 | Loss: 88.765594


Epoch 46/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.87it/s]

Batch 1800/4538 | Loss: 110.576691


Epoch 46/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.86it/s]

Batch 2000/4538 | Loss: 92.780930


Epoch 46/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.81it/s]

Batch 2200/4538 | Loss: 96.812263


Epoch 46/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.93it/s]

Batch 2400/4538 | Loss: 110.005478


Epoch 46/100 [Train]:  57%|█████▋    | 2607/4538 [01:14<00:55, 34.77it/s]

Batch 2600/4538 | Loss: 115.263832


Epoch 46/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.79it/s]

Batch 2800/4538 | Loss: 106.139969


Epoch 46/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.76it/s]

Batch 3000/4538 | Loss: 89.368462


Epoch 46/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.89it/s]

Batch 3200/4538 | Loss: 113.962135


Epoch 46/100 [Train]:  75%|███████▌  | 3407/4538 [01:37<00:32, 34.88it/s]

Batch 3400/4538 | Loss: 109.001480


Epoch 46/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.86it/s]

Batch 3600/4538 | Loss: 65.568970


Epoch 46/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.39it/s]

Batch 3800/4538 | Loss: 137.059570


Epoch 46/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.73it/s]

Batch 4000/4538 | Loss: 66.629662


Epoch 46/100 [Train]:  93%|█████████▎| 4207/4538 [02:00<00:09, 34.76it/s]

Batch 4200/4538 | Loss: 93.957741


Epoch 46/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.95it/s]

Batch 4400/4538 | Loss: 84.347527


Epoch 46/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.78it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 124.05it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.84it/s]


Epoch 46/100 | Average Train Loss: 96.189087 | Average Validation Loss: 94.976792
Saved new best model with validation loss: 94.976792


Epoch 47/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.58it/s]

Batch 0/4538 | Loss: 119.418694


Epoch 47/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.88it/s]

Batch 200/4538 | Loss: 66.512405


Epoch 47/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.77it/s]

Batch 400/4538 | Loss: 113.792320


Epoch 47/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:52, 34.81it/s]

Batch 600/4538 | Loss: 105.420113


Epoch 47/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.80it/s]

Batch 800/4538 | Loss: 96.588753


Epoch 47/100 [Train]:  22%|██▏       | 1007/4538 [00:28<01:41, 34.81it/s]

Batch 1000/4538 | Loss: 102.067642


Epoch 47/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.84it/s]

Batch 1200/4538 | Loss: 92.231850


Epoch 47/100 [Train]:  31%|███       | 1407/4538 [00:40<01:29, 35.10it/s]

Batch 1400/4538 | Loss: 97.302032


Epoch 47/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:23, 34.92it/s]

Batch 1600/4538 | Loss: 89.969109


Epoch 47/100 [Train]:  40%|███▉      | 1807/4538 [00:51<01:18, 34.89it/s]

Batch 1800/4538 | Loss: 85.448547


Epoch 47/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 35.13it/s]

Batch 2000/4538 | Loss: 110.896133


Epoch 47/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.55it/s]

Batch 2200/4538 | Loss: 74.384750


Epoch 47/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.47it/s]

Batch 2400/4538 | Loss: 102.613098


Epoch 47/100 [Train]:  57%|█████▋    | 2607/4538 [01:14<00:55, 34.58it/s]

Batch 2600/4538 | Loss: 93.563889


Epoch 47/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:50, 34.58it/s]

Batch 2800/4538 | Loss: 97.244110


Epoch 47/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.68it/s]

Batch 3000/4538 | Loss: 88.884163


Epoch 47/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.58it/s]

Batch 3200/4538 | Loss: 97.979935


Epoch 47/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.61it/s]

Batch 3400/4538 | Loss: 64.163689


Epoch 47/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.74it/s]

Batch 3600/4538 | Loss: 97.504333


Epoch 47/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.80it/s]

Batch 3800/4538 | Loss: 102.308151


Epoch 47/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.90it/s]

Batch 4000/4538 | Loss: 106.503708


Epoch 47/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.87it/s]

Batch 4200/4538 | Loss: 95.665764


Epoch 47/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.85it/s]

Batch 4400/4538 | Loss: 81.007637


Epoch 47/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.76it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 124.23it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.69it/s]


Epoch 47/100 | Average Train Loss: 95.333275 | Average Validation Loss: 94.209045
Saved new best model with validation loss: 94.209045


Epoch 48/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.61it/s]

Batch 0/4538 | Loss: 91.807373


Epoch 48/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.90it/s]

Batch 200/4538 | Loss: 108.738380


Epoch 48/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.76it/s]

Batch 400/4538 | Loss: 96.504288


Epoch 48/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:52, 34.81it/s]

Batch 600/4538 | Loss: 102.182632


Epoch 48/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.78it/s]

Batch 800/4538 | Loss: 98.036331


Epoch 48/100 [Train]:  22%|██▏       | 1007/4538 [00:28<01:42, 34.37it/s]

Batch 1000/4538 | Loss: 97.971863


Epoch 48/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.75it/s]

Batch 1200/4538 | Loss: 92.164185


Epoch 48/100 [Train]:  31%|███       | 1407/4538 [00:40<01:29, 34.86it/s]

Batch 1400/4538 | Loss: 104.094589


Epoch 48/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:23, 34.91it/s]

Batch 1600/4538 | Loss: 110.833946


Epoch 48/100 [Train]:  40%|███▉      | 1807/4538 [00:51<01:18, 34.78it/s]

Batch 1800/4538 | Loss: 74.944168


Epoch 48/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:14, 33.85it/s]

Batch 2000/4538 | Loss: 95.957657


Epoch 48/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.88it/s]

Batch 2200/4538 | Loss: 119.983521


Epoch 48/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.72it/s]

Batch 2400/4538 | Loss: 88.933136


Epoch 48/100 [Train]:  57%|█████▋    | 2607/4538 [01:14<00:55, 34.87it/s]

Batch 2600/4538 | Loss: 97.315712


Epoch 48/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.85it/s]

Batch 2800/4538 | Loss: 103.442741


Epoch 48/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.77it/s]

Batch 3000/4538 | Loss: 99.519966


Epoch 48/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.76it/s]

Batch 3200/4538 | Loss: 98.183601


Epoch 48/100 [Train]:  75%|███████▌  | 3407/4538 [01:37<00:32, 34.71it/s]

Batch 3400/4538 | Loss: 101.483154


Epoch 48/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.72it/s]

Batch 3600/4538 | Loss: 111.750328


Epoch 48/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.70it/s]

Batch 3800/4538 | Loss: 95.220131


Epoch 48/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.84it/s]

Batch 4000/4538 | Loss: 94.461723


Epoch 48/100 [Train]:  93%|█████████▎| 4207/4538 [02:00<00:09, 34.74it/s]

Batch 4200/4538 | Loss: 93.887093


Epoch 48/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.74it/s]

Batch 4400/4538 | Loss: 93.964058


Epoch 48/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.78it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 124.20it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.33it/s]


Epoch 48/100 | Average Train Loss: 94.520515 | Average Validation Loss: 93.392454
Saved new best model with validation loss: 93.392454


Epoch 49/100 [Train]:   0%|          | 7/4538 [00:00<02:22, 31.76it/s]

Batch 0/4538 | Loss: 91.159889


Epoch 49/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.77it/s]

Batch 200/4538 | Loss: 100.579811


Epoch 49/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.81it/s]

Batch 400/4538 | Loss: 88.534431


Epoch 49/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.70it/s]

Batch 600/4538 | Loss: 122.675140


Epoch 49/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.54it/s]

Batch 800/4538 | Loss: 94.135490


Epoch 49/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.80it/s]

Batch 1000/4538 | Loss: 98.786209


Epoch 49/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.85it/s]

Batch 1200/4538 | Loss: 93.957832


Epoch 49/100 [Train]:  31%|███       | 1407/4538 [00:40<01:29, 34.85it/s]

Batch 1400/4538 | Loss: 106.494438


Epoch 49/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.74it/s]

Batch 1600/4538 | Loss: 105.817398


Epoch 49/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.68it/s]

Batch 1800/4538 | Loss: 90.847061


Epoch 49/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.47it/s]

Batch 2000/4538 | Loss: 97.307076


Epoch 49/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.49it/s]

Batch 2200/4538 | Loss: 89.399544


Epoch 49/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.57it/s]

Batch 2400/4538 | Loss: 91.536652


Epoch 49/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.66it/s]

Batch 2600/4538 | Loss: 110.398651


Epoch 49/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.67it/s]

Batch 2800/4538 | Loss: 97.442162


Epoch 49/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.64it/s]

Batch 3000/4538 | Loss: 88.918709


Epoch 49/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.53it/s]

Batch 3200/4538 | Loss: 79.119255


Epoch 49/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.70it/s]

Batch 3400/4538 | Loss: 74.995728


Epoch 49/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.65it/s]

Batch 3600/4538 | Loss: 65.561394


Epoch 49/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.59it/s]

Batch 3800/4538 | Loss: 107.224770


Epoch 49/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.66it/s]

Batch 4000/4538 | Loss: 75.239082


Epoch 49/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.58it/s]

Batch 4200/4538 | Loss: 89.561279


Epoch 49/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.59it/s]

Batch 4400/4538 | Loss: 104.832932


Epoch 49/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.65it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.94it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.79it/s]


Epoch 49/100 | Average Train Loss: 93.652725 | Average Validation Loss: 92.541527
Saved new best model with validation loss: 92.541527


Epoch 50/100 [Train]:   0%|          | 7/4538 [00:00<02:22, 31.76it/s]

Batch 0/4538 | Loss: 96.165047


Epoch 50/100 [Train]:   5%|▍         | 207/4538 [00:06<02:06, 34.34it/s]

Batch 200/4538 | Loss: 110.717598


Epoch 50/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.59it/s]

Batch 400/4538 | Loss: 86.922523


Epoch 50/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.59it/s]

Batch 600/4538 | Loss: 107.138519


Epoch 50/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.55it/s]

Batch 800/4538 | Loss: 95.251671


Epoch 50/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.59it/s]

Batch 1000/4538 | Loss: 97.646194


Epoch 50/100 [Train]:  27%|██▋       | 1203/4538 [00:34<01:36, 34.49it/s]

Batch 1200/4538 | Loss: 110.887962


Epoch 50/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.50it/s]

Batch 1400/4538 | Loss: 93.698494


Epoch 50/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.60it/s]

Batch 1600/4538 | Loss: 79.274719


Epoch 50/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.64it/s]

Batch 1800/4538 | Loss: 77.317001


Epoch 50/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.65it/s]

Batch 2000/4538 | Loss: 91.727203


Epoch 50/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.60it/s]

Batch 2200/4538 | Loss: 86.846008


Epoch 50/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.58it/s]

Batch 2400/4538 | Loss: 91.166382


Epoch 50/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.51it/s]

Batch 2600/4538 | Loss: 86.202385


Epoch 50/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.49it/s]

Batch 2800/4538 | Loss: 92.243111


Epoch 50/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.63it/s]

Batch 3000/4538 | Loss: 84.809456


Epoch 50/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.36it/s]

Batch 3200/4538 | Loss: 80.508453


Epoch 50/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.72it/s]

Batch 3400/4538 | Loss: 96.978294


Epoch 50/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.71it/s]

Batch 3600/4538 | Loss: 109.591217


Epoch 50/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.72it/s]

Batch 3800/4538 | Loss: 79.605980


Epoch 50/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.77it/s]

Batch 4000/4538 | Loss: 97.208153


Epoch 50/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.89it/s]

Batch 4200/4538 | Loss: 91.998062


Epoch 50/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.83it/s]

Batch 4400/4538 | Loss: 90.802025


Epoch 50/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.62it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.77it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.51it/s]


Epoch 50/100 | Average Train Loss: 92.879082 | Average Validation Loss: 91.746054
Saved new best model with validation loss: 91.746054


Epoch 51/100 [Train]:   0%|          | 3/4538 [00:00<02:46, 27.30it/s]

Batch 0/4538 | Loss: 88.291862


Epoch 51/100 [Train]:   5%|▍         | 206/4538 [00:06<02:04, 34.68it/s]

Batch 200/4538 | Loss: 94.170403


Epoch 51/100 [Train]:   9%|▉         | 406/4538 [00:11<01:59, 34.72it/s]

Batch 400/4538 | Loss: 77.520119


Epoch 51/100 [Train]:  13%|█▎        | 606/4538 [00:17<01:53, 34.79it/s]

Batch 600/4538 | Loss: 65.170418


Epoch 51/100 [Train]:  18%|█▊        | 806/4538 [00:23<01:47, 34.82it/s]

Batch 800/4538 | Loss: 152.579453


Epoch 51/100 [Train]:  22%|██▏       | 1006/4538 [00:28<01:41, 34.81it/s]

Batch 1000/4538 | Loss: 73.338463


Epoch 51/100 [Train]:  27%|██▋       | 1206/4538 [00:34<01:35, 34.81it/s]

Batch 1200/4538 | Loss: 98.765305


Epoch 51/100 [Train]:  31%|███       | 1406/4538 [00:40<01:30, 34.63it/s]

Batch 1400/4538 | Loss: 81.356445


Epoch 51/100 [Train]:  35%|███▌      | 1606/4538 [00:46<01:24, 34.77it/s]

Batch 1600/4538 | Loss: 103.785553


Epoch 51/100 [Train]:  40%|███▉      | 1806/4538 [00:52<01:18, 34.87it/s]

Batch 1800/4538 | Loss: 94.280624


Epoch 51/100 [Train]:  44%|████▍     | 2006/4538 [00:57<01:12, 34.74it/s]

Batch 2000/4538 | Loss: 97.161499


Epoch 51/100 [Train]:  49%|████▊     | 2206/4538 [01:03<01:08, 33.98it/s]

Batch 2200/4538 | Loss: 64.813896


Epoch 51/100 [Train]:  53%|█████▎    | 2406/4538 [01:09<01:02, 34.10it/s]

Batch 2400/4538 | Loss: 94.930817


Epoch 51/100 [Train]:  57%|█████▋    | 2606/4538 [01:15<00:55, 34.74it/s]

Batch 2600/4538 | Loss: 89.175888


Epoch 51/100 [Train]:  62%|██████▏   | 2806/4538 [01:20<00:49, 34.70it/s]

Batch 2800/4538 | Loss: 87.802002


Epoch 51/100 [Train]:  66%|██████▌   | 3006/4538 [01:26<00:44, 34.75it/s]

Batch 3000/4538 | Loss: 82.759163


Epoch 51/100 [Train]:  71%|███████   | 3206/4538 [01:32<00:38, 34.77it/s]

Batch 3200/4538 | Loss: 84.793228


Epoch 51/100 [Train]:  75%|███████▌  | 3406/4538 [01:38<00:32, 34.91it/s]

Batch 3400/4538 | Loss: 91.289009


Epoch 51/100 [Train]:  79%|███████▉  | 3606/4538 [01:43<00:26, 34.96it/s]

Batch 3600/4538 | Loss: 98.738495


Epoch 51/100 [Train]:  84%|████████▍ | 3806/4538 [01:49<00:21, 34.81it/s]

Batch 3800/4538 | Loss: 83.528351


Epoch 51/100 [Train]:  88%|████████▊ | 4006/4538 [01:55<00:15, 34.74it/s]

Batch 4000/4538 | Loss: 109.880348


Epoch 51/100 [Train]:  93%|█████████▎| 4206/4538 [02:01<00:09, 34.80it/s]

Batch 4200/4538 | Loss: 84.831139


Epoch 51/100 [Train]:  97%|█████████▋| 4406/4538 [02:06<00:03, 34.89it/s]

Batch 4400/4538 | Loss: 108.163338


Epoch 51/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.76it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:35<00:00, 128.15it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.25it/s]


Epoch 51/100 | Average Train Loss: 92.073372 | Average Validation Loss: 91.043001
Saved new best model with validation loss: 91.043001


Epoch 52/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 76.640396


Epoch 52/100 [Train]:   4%|▍         | 204/4538 [00:05<01:46, 40.84it/s]

Batch 200/4538 | Loss: 94.873711


Epoch 52/100 [Train]:   9%|▉         | 404/4538 [00:09<01:41, 40.82it/s]

Batch 400/4538 | Loss: 92.956848


Epoch 52/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:36, 40.95it/s]

Batch 600/4538 | Loss: 101.811302


Epoch 52/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 40.78it/s]

Batch 800/4538 | Loss: 90.824005


Epoch 52/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 41.04it/s]

Batch 1000/4538 | Loss: 89.724342


Epoch 52/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.15it/s]

Batch 1200/4538 | Loss: 100.395615


Epoch 52/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 40.88it/s]

Batch 1400/4538 | Loss: 93.757622


Epoch 52/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:11, 41.04it/s]

Batch 1600/4538 | Loss: 87.507286


Epoch 52/100 [Train]:  40%|███▉      | 1804/4538 [00:44<01:06, 40.96it/s]

Batch 1800/4538 | Loss: 100.014679


Epoch 52/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.16it/s]

Batch 2000/4538 | Loss: 81.876099


Epoch 52/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:57, 40.80it/s]

Batch 2200/4538 | Loss: 85.888306


Epoch 52/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.64it/s]

Batch 2400/4538 | Loss: 95.597977


Epoch 52/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 40.96it/s]

Batch 2600/4538 | Loss: 91.307114


Epoch 52/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 40.77it/s]

Batch 2800/4538 | Loss: 94.201523


Epoch 52/100 [Train]:  66%|██████▌   | 3004/4538 [01:13<00:37, 41.06it/s]

Batch 3000/4538 | Loss: 81.348656


Epoch 52/100 [Train]:  71%|███████   | 3204/4538 [01:18<00:32, 41.19it/s]

Batch 3200/4538 | Loss: 73.259155


Epoch 52/100 [Train]:  75%|███████▌  | 3404/4538 [01:23<00:27, 41.08it/s]

Batch 3400/4538 | Loss: 97.112709


Epoch 52/100 [Train]:  79%|███████▉  | 3604/4538 [01:28<00:22, 41.25it/s]

Batch 3600/4538 | Loss: 77.026421


Epoch 52/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 40.86it/s]

Batch 3800/4538 | Loss: 90.440483


Epoch 52/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:13, 40.82it/s]

Batch 4000/4538 | Loss: 94.434059


Epoch 52/100 [Train]:  93%|█████████▎| 4204/4538 [01:42<00:08, 41.08it/s]

Batch 4200/4538 | Loss: 90.960480


Epoch 52/100 [Train]:  97%|█████████▋| 4404/4538 [01:47<00:03, 40.90it/s]

Batch 4400/4538 | Loss: 71.943138


Epoch 52/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.96it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.39it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.80it/s]


Epoch 52/100 | Average Train Loss: 91.389236 | Average Validation Loss: 90.430567
Saved new best model with validation loss: 90.430567


Epoch 53/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 96.478752


Epoch 53/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.02it/s]

Batch 200/4538 | Loss: 102.088463


Epoch 53/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.11it/s]

Batch 400/4538 | Loss: 63.659847


Epoch 53/100 [Train]:  13%|█▎        | 608/4538 [00:14<01:35, 41.01it/s]

Batch 600/4538 | Loss: 63.473961


Epoch 53/100 [Train]:  18%|█▊        | 808/4538 [00:19<01:31, 40.90it/s]

Batch 800/4538 | Loss: 79.795494


Epoch 53/100 [Train]:  22%|██▏       | 1008/4538 [00:24<01:26, 40.98it/s]

Batch 1000/4538 | Loss: 83.250999


Epoch 53/100 [Train]:  27%|██▋       | 1208/4538 [00:29<01:21, 41.04it/s]

Batch 1200/4538 | Loss: 108.283035


Epoch 53/100 [Train]:  31%|███       | 1408/4538 [00:34<01:16, 41.10it/s]

Batch 1400/4538 | Loss: 97.140533


Epoch 53/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:11, 41.00it/s]

Batch 1600/4538 | Loss: 92.855164


Epoch 53/100 [Train]:  40%|███▉      | 1808/4538 [00:44<01:06, 40.83it/s]

Batch 1800/4538 | Loss: 82.718399


Epoch 53/100 [Train]:  44%|████▍     | 2008/4538 [00:49<01:01, 41.11it/s]

Batch 2000/4538 | Loss: 82.139816


Epoch 53/100 [Train]:  49%|████▊     | 2208/4538 [00:53<00:56, 40.99it/s]

Batch 2200/4538 | Loss: 87.767159


Epoch 53/100 [Train]:  53%|█████▎    | 2407/4538 [00:58<00:52, 40.97it/s]

Batch 2400/4538 | Loss: 100.813744


Epoch 53/100 [Train]:  57%|█████▋    | 2607/4538 [01:03<00:46, 41.16it/s]

Batch 2600/4538 | Loss: 91.294357


Epoch 53/100 [Train]:  62%|██████▏   | 2807/4538 [01:08<00:41, 41.22it/s]

Batch 2800/4538 | Loss: 109.734833


Epoch 53/100 [Train]:  66%|██████▋   | 3007/4538 [01:13<00:37, 40.89it/s]

Batch 3000/4538 | Loss: 71.551384


Epoch 53/100 [Train]:  71%|███████   | 3207/4538 [01:18<00:32, 40.93it/s]

Batch 3200/4538 | Loss: 105.492264


Epoch 53/100 [Train]:  75%|███████▌  | 3407/4538 [01:23<00:27, 40.85it/s]

Batch 3400/4538 | Loss: 90.248322


Epoch 53/100 [Train]:  79%|███████▉  | 3607/4538 [01:28<00:22, 40.86it/s]

Batch 3600/4538 | Loss: 97.468704


Epoch 53/100 [Train]:  84%|████████▍ | 3807/4538 [01:32<00:17, 40.83it/s]

Batch 3800/4538 | Loss: 88.524933


Epoch 53/100 [Train]:  88%|████████▊ | 4007/4538 [01:37<00:12, 40.89it/s]

Batch 4000/4538 | Loss: 117.048866


Epoch 53/100 [Train]:  93%|█████████▎| 4207/4538 [01:42<00:08, 40.72it/s]

Batch 4200/4538 | Loss: 69.185730


Epoch 53/100 [Train]:  97%|█████████▋| 4407/4538 [01:47<00:03, 40.99it/s]

Batch 4400/4538 | Loss: 95.777534


Epoch 53/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.96it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 129.91it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.43it/s]


Epoch 53/100 | Average Train Loss: 90.785561 | Average Validation Loss: 89.878579
Saved new best model with validation loss: 89.878579


Epoch 54/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 84.711060


Epoch 54/100 [Train]:   4%|▍         | 204/4538 [00:04<01:46, 40.75it/s]

Batch 200/4538 | Loss: 86.760025


Epoch 54/100 [Train]:   9%|▉         | 404/4538 [00:09<01:41, 40.68it/s]

Batch 400/4538 | Loss: 101.368462


Epoch 54/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:36, 40.85it/s]

Batch 600/4538 | Loss: 90.152565


Epoch 54/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 40.81it/s]

Batch 800/4538 | Loss: 98.560402


Epoch 54/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.97it/s]

Batch 1000/4538 | Loss: 104.534721


Epoch 54/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.02it/s]

Batch 1200/4538 | Loss: 87.808952


Epoch 54/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 41.03it/s]

Batch 1400/4538 | Loss: 94.782578


Epoch 54/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:11, 40.75it/s]

Batch 1600/4538 | Loss: 79.641106


Epoch 54/100 [Train]:  40%|███▉      | 1808/4538 [00:44<01:06, 41.00it/s]

Batch 1800/4538 | Loss: 96.200241


Epoch 54/100 [Train]:  44%|████▍     | 2008/4538 [00:49<01:01, 40.83it/s]

Batch 2000/4538 | Loss: 93.073105


Epoch 54/100 [Train]:  49%|████▊     | 2208/4538 [00:54<00:57, 40.79it/s]

Batch 2200/4538 | Loss: 117.597862


Epoch 54/100 [Train]:  53%|█████▎    | 2408/4538 [00:58<00:52, 40.89it/s]

Batch 2400/4538 | Loss: 101.819473


Epoch 54/100 [Train]:  57%|█████▋    | 2608/4538 [01:03<00:47, 40.94it/s]

Batch 2600/4538 | Loss: 88.587173


Epoch 54/100 [Train]:  62%|██████▏   | 2808/4538 [01:08<00:42, 41.07it/s]

Batch 2800/4538 | Loss: 88.232025


Epoch 54/100 [Train]:  66%|██████▋   | 3008/4538 [01:13<00:37, 40.98it/s]

Batch 3000/4538 | Loss: 82.618172


Epoch 54/100 [Train]:  71%|███████   | 3208/4538 [01:18<00:32, 41.08it/s]

Batch 3200/4538 | Loss: 80.450516


Epoch 54/100 [Train]:  75%|███████▌  | 3408/4538 [01:23<00:27, 40.55it/s]

Batch 3400/4538 | Loss: 71.553833


Epoch 54/100 [Train]:  80%|███████▉  | 3608/4538 [01:28<00:22, 40.64it/s]

Batch 3600/4538 | Loss: 88.788109


Epoch 54/100 [Train]:  84%|████████▍ | 3808/4538 [01:33<00:17, 41.08it/s]

Batch 3800/4538 | Loss: 89.428055


Epoch 54/100 [Train]:  88%|████████▊ | 4007/4538 [01:38<00:13, 39.95it/s]

Batch 4000/4538 | Loss: 87.257210


Epoch 54/100 [Train]:  93%|█████████▎| 4207/4538 [01:42<00:08, 40.79it/s]

Batch 4200/4538 | Loss: 83.648628


Epoch 54/100 [Train]:  97%|█████████▋| 4407/4538 [01:47<00:03, 40.98it/s]

Batch 4400/4538 | Loss: 89.446671


Epoch 54/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.89it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.58it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.76it/s]


Epoch 54/100 | Average Train Loss: 90.236061 | Average Validation Loss: 89.411118
Saved new best model with validation loss: 89.411118


Epoch 55/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 80.873688


Epoch 55/100 [Train]:   4%|▍         | 204/4538 [00:05<01:46, 40.64it/s]

Batch 200/4538 | Loss: 83.840469


Epoch 55/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 40.99it/s]

Batch 400/4538 | Loss: 87.403992


Epoch 55/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:36, 40.67it/s]

Batch 600/4538 | Loss: 96.817207


Epoch 55/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:32, 40.36it/s]

Batch 800/4538 | Loss: 102.173950


Epoch 55/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.85it/s]

Batch 1000/4538 | Loss: 84.254501


Epoch 55/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:22, 40.56it/s]

Batch 1200/4538 | Loss: 82.644814


Epoch 55/100 [Train]:  31%|███       | 1404/4538 [00:34<01:17, 40.30it/s]

Batch 1400/4538 | Loss: 104.440208


Epoch 55/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:12, 40.75it/s]

Batch 1600/4538 | Loss: 92.626495


Epoch 55/100 [Train]:  40%|███▉      | 1804/4538 [00:44<01:06, 41.00it/s]

Batch 1800/4538 | Loss: 120.638222


Epoch 55/100 [Train]:  44%|████▍     | 2004/4538 [00:49<01:02, 40.25it/s]

Batch 2000/4538 | Loss: 95.951057


Epoch 55/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:57, 40.78it/s]

Batch 2200/4538 | Loss: 95.406769


Epoch 55/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.88it/s]

Batch 2400/4538 | Loss: 94.255196


Epoch 55/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:48, 39.66it/s]

Batch 2600/4538 | Loss: 120.275314


Epoch 55/100 [Train]:  62%|██████▏   | 2808/4538 [01:08<00:42, 40.75it/s]

Batch 2800/4538 | Loss: 96.697105


Epoch 55/100 [Train]:  66%|██████▋   | 3008/4538 [01:13<00:37, 40.55it/s]

Batch 3000/4538 | Loss: 89.676903


Epoch 55/100 [Train]:  71%|███████   | 3208/4538 [01:18<00:32, 40.81it/s]

Batch 3200/4538 | Loss: 80.155548


Epoch 55/100 [Train]:  75%|███████▌  | 3408/4538 [01:23<00:27, 40.64it/s]

Batch 3400/4538 | Loss: 93.414604


Epoch 55/100 [Train]:  80%|███████▉  | 3608/4538 [01:28<00:22, 40.57it/s]

Batch 3600/4538 | Loss: 93.941811


Epoch 55/100 [Train]:  84%|████████▍ | 3808/4538 [01:33<00:18, 40.36it/s]

Batch 3800/4538 | Loss: 94.575897


Epoch 55/100 [Train]:  88%|████████▊ | 4005/4538 [01:38<00:13, 40.52it/s]

Batch 4000/4538 | Loss: 87.801598


Epoch 55/100 [Train]:  93%|█████████▎| 4205/4538 [01:43<00:08, 40.75it/s]

Batch 4200/4538 | Loss: 101.528450


Epoch 55/100 [Train]:  97%|█████████▋| 4405/4538 [01:48<00:03, 40.66it/s]

Batch 4400/4538 | Loss: 92.981262


Epoch 55/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.75it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:35<00:00, 129.31it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.93it/s]


Epoch 55/100 | Average Train Loss: 89.709412 | Average Validation Loss: 88.929509
Saved new best model with validation loss: 88.929509


Epoch 56/100 [Train]:   0%|          | 8/4538 [00:00<02:03, 36.73it/s]

Batch 0/4538 | Loss: 94.088730


Epoch 56/100 [Train]:   5%|▍         | 207/4538 [00:05<01:46, 40.74it/s]

Batch 200/4538 | Loss: 85.497910


Epoch 56/100 [Train]:   9%|▉         | 407/4538 [00:09<01:40, 40.96it/s]

Batch 400/4538 | Loss: 81.039520


Epoch 56/100 [Train]:  13%|█▎        | 607/4538 [00:14<01:38, 40.05it/s]

Batch 600/4538 | Loss: 96.405548


Epoch 56/100 [Train]:  18%|█▊        | 807/4538 [00:19<01:31, 40.66it/s]

Batch 800/4538 | Loss: 86.454819


Epoch 56/100 [Train]:  22%|██▏       | 1007/4538 [00:24<01:26, 40.69it/s]

Batch 1000/4538 | Loss: 84.049034


Epoch 56/100 [Train]:  27%|██▋       | 1205/4538 [00:29<01:23, 39.73it/s]

Batch 1200/4538 | Loss: 79.427322


Epoch 56/100 [Train]:  31%|███       | 1404/4538 [00:34<01:17, 40.42it/s]

Batch 1400/4538 | Loss: 107.638397


Epoch 56/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:12, 40.72it/s]

Batch 1600/4538 | Loss: 92.373383


Epoch 56/100 [Train]:  40%|███▉      | 1804/4538 [00:44<01:07, 40.75it/s]

Batch 1800/4538 | Loss: 84.812187


Epoch 56/100 [Train]:  44%|████▍     | 2004/4538 [00:49<01:01, 40.89it/s]

Batch 2000/4538 | Loss: 59.418121


Epoch 56/100 [Train]:  49%|████▊     | 2204/4538 [00:54<00:57, 40.56it/s]

Batch 2200/4538 | Loss: 87.092003


Epoch 56/100 [Train]:  53%|█████▎    | 2404/4538 [00:59<00:52, 40.98it/s]

Batch 2400/4538 | Loss: 97.683815


Epoch 56/100 [Train]:  57%|█████▋    | 2608/4538 [01:04<00:47, 40.74it/s]

Batch 2600/4538 | Loss: 89.990944


Epoch 56/100 [Train]:  62%|██████▏   | 2808/4538 [01:08<00:42, 40.67it/s]

Batch 2800/4538 | Loss: 107.378532


Epoch 56/100 [Train]:  66%|██████▋   | 3008/4538 [01:13<00:37, 40.65it/s]

Batch 3000/4538 | Loss: 105.805161


Epoch 56/100 [Train]:  71%|███████   | 3208/4538 [01:18<00:32, 40.52it/s]

Batch 3200/4538 | Loss: 90.752525


Epoch 56/100 [Train]:  75%|███████▌  | 3408/4538 [01:23<00:27, 40.61it/s]

Batch 3400/4538 | Loss: 82.751205


Epoch 56/100 [Train]:  80%|███████▉  | 3608/4538 [01:28<00:22, 40.88it/s]

Batch 3600/4538 | Loss: 87.861877


Epoch 56/100 [Train]:  84%|████████▍ | 3805/4538 [01:33<00:19, 38.37it/s]

Batch 3800/4538 | Loss: 76.170242


Epoch 56/100 [Train]:  88%|████████▊ | 4005/4538 [01:38<00:13, 38.33it/s]

Batch 4000/4538 | Loss: 63.381451


Epoch 56/100 [Train]:  93%|█████████▎| 4205/4538 [01:44<00:08, 38.21it/s]

Batch 4200/4538 | Loss: 99.465897


Epoch 56/100 [Train]:  97%|█████████▋| 4405/4538 [01:49<00:03, 38.21it/s]

Batch 4400/4538 | Loss: 84.992805


Epoch 56/100 [Train]: 100%|██████████| 4538/4538 [01:52<00:00, 40.22it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:35<00:00, 128.96it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 99.44it/s] 


Epoch 56/100 | Average Train Loss: 89.233429 | Average Validation Loss: 88.454523
Saved new best model with validation loss: 88.454523


Epoch 57/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 94.330200


Epoch 57/100 [Train]:   5%|▍         | 207/4538 [00:05<01:53, 38.16it/s]

Batch 200/4538 | Loss: 83.388443


Epoch 57/100 [Train]:   9%|▉         | 407/4538 [00:10<01:47, 38.27it/s]

Batch 400/4538 | Loss: 109.020065


Epoch 57/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:43, 38.11it/s]

Batch 600/4538 | Loss: 106.751335


Epoch 57/100 [Train]:  18%|█▊        | 807/4538 [00:21<01:37, 38.13it/s]

Batch 800/4538 | Loss: 105.152290


Epoch 57/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:32, 38.09it/s]

Batch 1000/4538 | Loss: 88.922485


Epoch 57/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:27, 38.13it/s]

Batch 1200/4538 | Loss: 82.628159


Epoch 57/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.22it/s]

Batch 1400/4538 | Loss: 79.917480


Epoch 57/100 [Train]:  35%|███▌      | 1607/4538 [00:42<01:16, 38.11it/s]

Batch 1600/4538 | Loss: 79.367371


Epoch 57/100 [Train]:  40%|███▉      | 1807/4538 [00:47<01:11, 38.15it/s]

Batch 1800/4538 | Loss: 105.047287


Epoch 57/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:06, 38.07it/s]

Batch 2000/4538 | Loss: 54.504745


Epoch 57/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:01, 38.21it/s]

Batch 2200/4538 | Loss: 93.385292


Epoch 57/100 [Train]:  53%|█████▎    | 2407/4538 [01:03<00:55, 38.16it/s]

Batch 2400/4538 | Loss: 85.754364


Epoch 57/100 [Train]:  57%|█████▋    | 2607/4538 [01:08<00:50, 38.22it/s]

Batch 2600/4538 | Loss: 87.493164


Epoch 57/100 [Train]:  62%|██████▏   | 2807/4538 [01:13<00:45, 38.16it/s]

Batch 2800/4538 | Loss: 99.300217


Epoch 57/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:40, 38.13it/s]

Batch 3000/4538 | Loss: 58.148270


Epoch 57/100 [Train]:  71%|███████   | 3207/4538 [01:24<00:35, 37.83it/s]

Batch 3200/4538 | Loss: 94.576759


Epoch 57/100 [Train]:  75%|███████▌  | 3407/4538 [01:29<00:29, 38.23it/s]

Batch 3400/4538 | Loss: 89.993553


Epoch 57/100 [Train]:  79%|███████▉  | 3607/4538 [01:34<00:24, 38.16it/s]

Batch 3600/4538 | Loss: 84.946037


Epoch 57/100 [Train]:  84%|████████▍ | 3807/4538 [01:39<00:19, 38.10it/s]

Batch 3800/4538 | Loss: 77.642586


Epoch 57/100 [Train]:  88%|████████▊ | 4007/4538 [01:45<00:13, 38.20it/s]

Batch 4000/4538 | Loss: 90.059578


Epoch 57/100 [Train]:  93%|█████████▎| 4207/4538 [01:50<00:08, 38.14it/s]

Batch 4200/4538 | Loss: 78.952126


Epoch 57/100 [Train]:  97%|█████████▋| 4407/4538 [01:55<00:03, 38.13it/s]

Batch 4400/4538 | Loss: 51.656841


Epoch 57/100 [Train]: 100%|██████████| 4538/4538 [01:58<00:00, 38.15it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:35<00:00, 129.00it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 98.54it/s] 


Epoch 57/100 | Average Train Loss: 88.779755 | Average Validation Loss: 88.054882
Saved new best model with validation loss: 88.054882


Epoch 58/100 [Train]:   0%|          | 7/4538 [00:00<02:11, 34.49it/s]

Batch 0/4538 | Loss: 81.986481


Epoch 58/100 [Train]:   5%|▍         | 207/4538 [00:05<01:53, 38.17it/s]

Batch 200/4538 | Loss: 74.952827


Epoch 58/100 [Train]:   9%|▉         | 407/4538 [00:10<01:47, 38.27it/s]

Batch 400/4538 | Loss: 111.795731


Epoch 58/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:42, 38.18it/s]

Batch 600/4538 | Loss: 85.736809


Epoch 58/100 [Train]:  18%|█▊        | 807/4538 [00:21<01:37, 38.17it/s]

Batch 800/4538 | Loss: 88.953766


Epoch 58/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:34, 37.49it/s]

Batch 1000/4538 | Loss: 89.618835


Epoch 58/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:27, 38.22it/s]

Batch 1200/4538 | Loss: 63.873150


Epoch 58/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.33it/s]

Batch 1400/4538 | Loss: 82.457047


Epoch 58/100 [Train]:  35%|███▌      | 1607/4538 [00:42<01:16, 38.23it/s]

Batch 1600/4538 | Loss: 104.029305


Epoch 58/100 [Train]:  40%|███▉      | 1807/4538 [00:47<01:11, 38.37it/s]

Batch 1800/4538 | Loss: 75.260773


Epoch 58/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.36it/s]

Batch 2000/4538 | Loss: 82.346672


Epoch 58/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.30it/s]

Batch 2200/4538 | Loss: 100.935165


Epoch 58/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.32it/s]

Batch 2400/4538 | Loss: 110.277466


Epoch 58/100 [Train]:  57%|█████▋    | 2607/4538 [01:08<00:50, 38.37it/s]

Batch 2600/4538 | Loss: 70.813934


Epoch 58/100 [Train]:  62%|██████▏   | 2807/4538 [01:13<00:45, 38.38it/s]

Batch 2800/4538 | Loss: 80.502022


Epoch 58/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.32it/s]

Batch 3000/4538 | Loss: 78.887871


Epoch 58/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.27it/s]

Batch 3200/4538 | Loss: 103.571419


Epoch 58/100 [Train]:  75%|███████▌  | 3407/4538 [01:29<00:29, 38.46it/s]

Batch 3400/4538 | Loss: 97.250366


Epoch 58/100 [Train]:  79%|███████▉  | 3607/4538 [01:34<00:24, 38.29it/s]

Batch 3600/4538 | Loss: 103.966797


Epoch 58/100 [Train]:  84%|████████▍ | 3807/4538 [01:39<00:19, 38.42it/s]

Batch 3800/4538 | Loss: 104.070908


Epoch 58/100 [Train]:  88%|████████▊ | 4007/4538 [01:44<00:13, 38.26it/s]

Batch 4000/4538 | Loss: 81.691689


Epoch 58/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.28it/s]

Batch 4200/4538 | Loss: 74.328163


Epoch 58/100 [Train]:  97%|█████████▋| 4408/4538 [01:55<00:03, 40.78it/s]

Batch 4400/4538 | Loss: 88.375687


Epoch 58/100 [Train]: 100%|██████████| 4538/4538 [01:58<00:00, 38.39it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.26it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.06it/s]


Epoch 58/100 | Average Train Loss: 88.346375 | Average Validation Loss: 87.653794
Saved new best model with validation loss: 87.653794


Epoch 59/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 100.707718


Epoch 59/100 [Train]:   5%|▍         | 207/4538 [00:05<01:46, 40.65it/s]

Batch 200/4538 | Loss: 83.930275


Epoch 59/100 [Train]:   9%|▉         | 407/4538 [00:10<01:41, 40.86it/s]

Batch 400/4538 | Loss: 96.061920


Epoch 59/100 [Train]:  13%|█▎        | 607/4538 [00:14<01:36, 40.82it/s]

Batch 600/4538 | Loss: 88.556892


Epoch 59/100 [Train]:  18%|█▊        | 807/4538 [00:19<01:31, 40.57it/s]

Batch 800/4538 | Loss: 70.342453


Epoch 59/100 [Train]:  22%|██▏       | 1007/4538 [00:24<01:26, 40.75it/s]

Batch 1000/4538 | Loss: 54.756470


Epoch 59/100 [Train]:  27%|██▋       | 1207/4538 [00:29<01:21, 40.70it/s]

Batch 1200/4538 | Loss: 91.113480


Epoch 59/100 [Train]:  31%|███       | 1407/4538 [00:34<01:16, 40.84it/s]

Batch 1400/4538 | Loss: 98.714668


Epoch 59/100 [Train]:  35%|███▌      | 1607/4538 [00:39<01:12, 40.70it/s]

Batch 1600/4538 | Loss: 67.901115


Epoch 59/100 [Train]:  40%|███▉      | 1807/4538 [00:44<01:06, 40.90it/s]

Batch 1800/4538 | Loss: 90.454872


Epoch 59/100 [Train]:  44%|████▍     | 2007/4538 [00:49<01:01, 40.84it/s]

Batch 2000/4538 | Loss: 87.228210


Epoch 59/100 [Train]:  49%|████▊     | 2207/4538 [00:54<00:57, 40.63it/s]

Batch 2200/4538 | Loss: 82.872314


Epoch 59/100 [Train]:  53%|█████▎    | 2407/4538 [00:59<00:52, 40.82it/s]

Batch 2400/4538 | Loss: 77.379890


Epoch 59/100 [Train]:  57%|█████▋    | 2607/4538 [01:03<00:47, 40.78it/s]

Batch 2600/4538 | Loss: 60.703209


Epoch 59/100 [Train]:  62%|██████▏   | 2807/4538 [01:08<00:42, 40.68it/s]

Batch 2800/4538 | Loss: 91.649757


Epoch 59/100 [Train]:  66%|██████▋   | 3007/4538 [01:13<00:37, 40.67it/s]

Batch 3000/4538 | Loss: 87.676964


Epoch 59/100 [Train]:  71%|███████   | 3207/4538 [01:18<00:32, 40.92it/s]

Batch 3200/4538 | Loss: 77.644333


Epoch 59/100 [Train]:  75%|███████▌  | 3407/4538 [01:23<00:27, 40.88it/s]

Batch 3400/4538 | Loss: 85.043251


Epoch 59/100 [Train]:  79%|███████▉  | 3607/4538 [01:28<00:22, 40.75it/s]

Batch 3600/4538 | Loss: 79.353912


Epoch 59/100 [Train]:  84%|████████▍ | 3807/4538 [01:33<00:17, 40.81it/s]

Batch 3800/4538 | Loss: 79.741829


Epoch 59/100 [Train]:  88%|████████▊ | 4007/4538 [01:38<00:13, 40.54it/s]

Batch 4000/4538 | Loss: 101.901596


Epoch 59/100 [Train]:  93%|█████████▎| 4207/4538 [01:43<00:08, 41.22it/s]

Batch 4200/4538 | Loss: 67.023735


Epoch 59/100 [Train]:  97%|█████████▋| 4407/4538 [01:48<00:03, 40.79it/s]

Batch 4400/4538 | Loss: 76.655579


Epoch 59/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.80it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 129.91it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.38it/s]


Epoch 59/100 | Average Train Loss: 87.945824 | Average Validation Loss: 87.283244
Saved new best model with validation loss: 87.283244


Epoch 60/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.779770


Epoch 60/100 [Train]:   5%|▍         | 207/4538 [00:05<01:53, 38.07it/s]

Batch 200/4538 | Loss: 87.841232


Epoch 60/100 [Train]:   9%|▉         | 407/4538 [00:10<01:48, 38.25it/s]

Batch 400/4538 | Loss: 68.502815


Epoch 60/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:42, 38.43it/s]

Batch 600/4538 | Loss: 108.076172


Epoch 60/100 [Train]:  18%|█▊        | 807/4538 [00:21<01:37, 38.37it/s]

Batch 800/4538 | Loss: 99.245529


Epoch 60/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:33, 37.59it/s]

Batch 1000/4538 | Loss: 103.317970


Epoch 60/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.42it/s]

Batch 1200/4538 | Loss: 89.464966


Epoch 60/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.37it/s]

Batch 1400/4538 | Loss: 100.623528


Epoch 60/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.42it/s]

Batch 1600/4538 | Loss: 91.139473


Epoch 60/100 [Train]:  40%|███▉      | 1807/4538 [00:47<01:11, 38.40it/s]

Batch 1800/4538 | Loss: 76.386421


Epoch 60/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.57it/s]

Batch 2000/4538 | Loss: 108.853561


Epoch 60/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.51it/s]

Batch 2200/4538 | Loss: 96.096275


Epoch 60/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.37it/s]

Batch 2400/4538 | Loss: 84.909546


Epoch 60/100 [Train]:  57%|█████▋    | 2607/4538 [01:08<00:50, 38.37it/s]

Batch 2600/4538 | Loss: 111.241028


Epoch 60/100 [Train]:  62%|██████▏   | 2807/4538 [01:13<00:45, 38.26it/s]

Batch 2800/4538 | Loss: 83.675606


Epoch 60/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.30it/s]

Batch 3000/4538 | Loss: 86.443680


Epoch 60/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.35it/s]

Batch 3200/4538 | Loss: 85.311592


Epoch 60/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.42it/s]

Batch 3400/4538 | Loss: 83.118881


Epoch 60/100 [Train]:  79%|███████▉  | 3607/4538 [01:34<00:24, 38.31it/s]

Batch 3600/4538 | Loss: 83.992455


Epoch 60/100 [Train]:  84%|████████▍ | 3807/4538 [01:39<00:19, 38.34it/s]

Batch 3800/4538 | Loss: 79.592041


Epoch 60/100 [Train]:  88%|████████▊ | 4007/4538 [01:44<00:13, 38.32it/s]

Batch 4000/4538 | Loss: 99.215828


Epoch 60/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.61it/s]

Batch 4200/4538 | Loss: 101.414696


Epoch 60/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.60it/s]

Batch 4400/4538 | Loss: 92.297287


Epoch 60/100 [Train]: 100%|██████████| 4538/4538 [01:58<00:00, 38.36it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 135.04it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 99.38it/s] 


Epoch 60/100 | Average Train Loss: 87.562141 | Average Validation Loss: 86.948507
Saved new best model with validation loss: 86.948507


Epoch 61/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 79.390945


Epoch 61/100 [Train]:   5%|▍         | 207/4538 [00:05<01:51, 38.69it/s]

Batch 200/4538 | Loss: 82.443970


Epoch 61/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.62it/s]

Batch 400/4538 | Loss: 83.144577


Epoch 61/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:45, 37.22it/s]

Batch 600/4538 | Loss: 77.292473


Epoch 61/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.62it/s]

Batch 800/4538 | Loss: 62.946014


Epoch 61/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.66it/s]

Batch 1000/4538 | Loss: 86.064819


Epoch 61/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:28, 37.67it/s]

Batch 1200/4538 | Loss: 87.437187


Epoch 61/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.55it/s]

Batch 1400/4538 | Loss: 92.533638


Epoch 61/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.55it/s]

Batch 1600/4538 | Loss: 98.536034


Epoch 61/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:11, 38.46it/s]

Batch 1800/4538 | Loss: 97.478027


Epoch 61/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.58it/s]

Batch 2000/4538 | Loss: 60.259968


Epoch 61/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.51it/s]

Batch 2200/4538 | Loss: 104.829697


Epoch 61/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.56it/s]

Batch 2400/4538 | Loss: 78.863419


Epoch 61/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.47it/s]

Batch 2600/4538 | Loss: 77.416260


Epoch 61/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.47it/s]

Batch 2800/4538 | Loss: 83.417664


Epoch 61/100 [Train]:  66%|██████▋   | 3007/4538 [01:17<00:39, 38.56it/s]

Batch 3000/4538 | Loss: 83.184692


Epoch 61/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.43it/s]

Batch 3200/4538 | Loss: 86.233116


Epoch 61/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.46it/s]

Batch 3400/4538 | Loss: 77.167740


Epoch 61/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.56it/s]

Batch 3600/4538 | Loss: 88.439133


Epoch 61/100 [Train]:  84%|████████▍ | 3807/4538 [01:38<00:18, 38.54it/s]

Batch 3800/4538 | Loss: 98.355461


Epoch 61/100 [Train]:  88%|████████▊ | 4007/4538 [01:43<00:13, 38.50it/s]

Batch 4000/4538 | Loss: 92.154984


Epoch 61/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.66it/s]

Batch 4200/4538 | Loss: 84.981743


Epoch 61/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.73it/s]

Batch 4400/4538 | Loss: 111.589340


Epoch 61/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.58it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.39it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.48it/s]


Epoch 61/100 | Average Train Loss: 87.210838 | Average Validation Loss: 86.597295
Saved new best model with validation loss: 86.597295


Epoch 62/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 79.056915


Epoch 62/100 [Train]:   5%|▍         | 207/4538 [00:05<01:51, 38.73it/s]

Batch 200/4538 | Loss: 82.396072


Epoch 62/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.74it/s]

Batch 400/4538 | Loss: 79.187599


Epoch 62/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:41, 38.69it/s]

Batch 600/4538 | Loss: 85.817787


Epoch 62/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.70it/s]

Batch 800/4538 | Loss: 76.203194


Epoch 62/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.61it/s]

Batch 1000/4538 | Loss: 97.114639


Epoch 62/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.73it/s]

Batch 1200/4538 | Loss: 125.780418


Epoch 62/100 [Train]:  31%|███       | 1407/4538 [00:36<01:22, 37.94it/s]

Batch 1400/4538 | Loss: 99.154297


Epoch 62/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:15, 38.69it/s]

Batch 1600/4538 | Loss: 78.545639


Epoch 62/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:10, 38.57it/s]

Batch 1800/4538 | Loss: 96.762169


Epoch 62/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.40it/s]

Batch 2000/4538 | Loss: 95.159370


Epoch 62/100 [Train]:  49%|████▊     | 2206/4538 [00:56<00:56, 41.31it/s]

Batch 2200/4538 | Loss: 90.951927


Epoch 62/100 [Train]:  53%|█████▎    | 2406/4538 [01:01<00:51, 41.07it/s]

Batch 2400/4538 | Loss: 82.380051


Epoch 62/100 [Train]:  57%|█████▋    | 2606/4538 [01:06<00:47, 40.27it/s]

Batch 2600/4538 | Loss: 78.231567


Epoch 62/100 [Train]:  62%|██████▏   | 2806/4538 [01:11<00:41, 41.31it/s]

Batch 2800/4538 | Loss: 90.881760


Epoch 62/100 [Train]:  66%|██████▌   | 3006/4538 [01:16<00:37, 41.10it/s]

Batch 3000/4538 | Loss: 86.157646


Epoch 62/100 [Train]:  71%|███████   | 3206/4538 [01:21<00:32, 41.14it/s]

Batch 3200/4538 | Loss: 91.936409


Epoch 62/100 [Train]:  75%|███████▌  | 3406/4538 [01:26<00:27, 41.04it/s]

Batch 3400/4538 | Loss: 101.387856


Epoch 62/100 [Train]:  79%|███████▉  | 3606/4538 [01:30<00:22, 41.27it/s]

Batch 3600/4538 | Loss: 100.119797


Epoch 62/100 [Train]:  84%|████████▍ | 3806/4538 [01:35<00:17, 41.15it/s]

Batch 3800/4538 | Loss: 93.341187


Epoch 62/100 [Train]:  88%|████████▊ | 4006/4538 [01:40<00:12, 41.39it/s]

Batch 4000/4538 | Loss: 58.156105


Epoch 62/100 [Train]:  93%|█████████▎| 4206/4538 [01:45<00:08, 41.15it/s]

Batch 4200/4538 | Loss: 91.442886


Epoch 62/100 [Train]:  97%|█████████▋| 4406/4538 [01:50<00:03, 41.20it/s]

Batch 4400/4538 | Loss: 103.342148


Epoch 62/100 [Train]: 100%|██████████| 4538/4538 [01:53<00:00, 39.97it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 131.15it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.25it/s]


Epoch 62/100 | Average Train Loss: 86.879158 | Average Validation Loss: 86.305121
Saved new best model with validation loss: 86.305121


Epoch 63/100 [Train]:   0%|          | 4/4538 [00:00<02:17, 32.92it/s]

Batch 0/4538 | Loss: 91.951454


Epoch 63/100 [Train]:   5%|▍         | 207/4538 [00:05<01:45, 40.94it/s]

Batch 200/4538 | Loss: 79.248672


Epoch 63/100 [Train]:   9%|▉         | 407/4538 [00:09<01:40, 40.91it/s]

Batch 400/4538 | Loss: 80.978027


Epoch 63/100 [Train]:  13%|█▎        | 607/4538 [00:14<01:35, 41.12it/s]

Batch 600/4538 | Loss: 88.796814


Epoch 63/100 [Train]:  18%|█▊        | 807/4538 [00:19<01:30, 41.24it/s]

Batch 800/4538 | Loss: 96.572601


Epoch 63/100 [Train]:  22%|██▏       | 1007/4538 [00:24<01:26, 41.01it/s]

Batch 1000/4538 | Loss: 119.427818


Epoch 63/100 [Train]:  27%|██▋       | 1207/4538 [00:29<01:21, 40.83it/s]

Batch 1200/4538 | Loss: 75.911125


Epoch 63/100 [Train]:  31%|███       | 1407/4538 [00:34<01:16, 41.09it/s]

Batch 1400/4538 | Loss: 95.676910


Epoch 63/100 [Train]:  35%|███▌      | 1607/4538 [00:39<01:11, 41.00it/s]

Batch 1600/4538 | Loss: 88.912346


Epoch 63/100 [Train]:  40%|███▉      | 1807/4538 [00:44<01:06, 41.04it/s]

Batch 1800/4538 | Loss: 86.369171


Epoch 63/100 [Train]:  44%|████▍     | 2007/4538 [00:48<01:01, 41.09it/s]

Batch 2000/4538 | Loss: 81.625374


Epoch 63/100 [Train]:  49%|████▊     | 2207/4538 [00:53<00:56, 41.55it/s]

Batch 2200/4538 | Loss: 98.786690


Epoch 63/100 [Train]:  53%|█████▎    | 2407/4538 [00:58<00:51, 41.27it/s]

Batch 2400/4538 | Loss: 82.324394


Epoch 63/100 [Train]:  57%|█████▋    | 2606/4538 [01:03<00:46, 41.14it/s]

Batch 2600/4538 | Loss: 100.035637


Epoch 63/100 [Train]:  62%|██████▏   | 2806/4538 [01:08<00:42, 41.09it/s]

Batch 2800/4538 | Loss: 77.414757


Epoch 63/100 [Train]:  66%|██████▌   | 3006/4538 [01:13<00:37, 41.02it/s]

Batch 3000/4538 | Loss: 88.252907


Epoch 63/100 [Train]:  71%|███████   | 3206/4538 [01:18<00:32, 40.97it/s]

Batch 3200/4538 | Loss: 90.588676


Epoch 63/100 [Train]:  75%|███████▌  | 3406/4538 [01:22<00:27, 40.96it/s]

Batch 3400/4538 | Loss: 86.440033


Epoch 63/100 [Train]:  79%|███████▉  | 3606/4538 [01:27<00:22, 41.07it/s]

Batch 3600/4538 | Loss: 89.867134


Epoch 63/100 [Train]:  84%|████████▍ | 3806/4538 [01:32<00:17, 41.53it/s]

Batch 3800/4538 | Loss: 90.890480


Epoch 63/100 [Train]:  88%|████████▊ | 4006/4538 [01:37<00:12, 41.54it/s]

Batch 4000/4538 | Loss: 89.289940


Epoch 63/100 [Train]:  93%|█████████▎| 4206/4538 [01:42<00:08, 41.01it/s]

Batch 4200/4538 | Loss: 79.751396


Epoch 63/100 [Train]:  97%|█████████▋| 4406/4538 [01:47<00:03, 41.16it/s]

Batch 4400/4538 | Loss: 73.795349


Epoch 63/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.09it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.45it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 98.81it/s] 


Epoch 63/100 | Average Train Loss: 86.571205 | Average Validation Loss: 86.014346
Saved new best model with validation loss: 86.014346


Epoch 64/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.164520


Epoch 64/100 [Train]:   4%|▍         | 204/4538 [00:04<01:44, 41.47it/s]

Batch 200/4538 | Loss: 65.593643


Epoch 64/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.27it/s]

Batch 400/4538 | Loss: 95.745377


Epoch 64/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.13it/s]

Batch 600/4538 | Loss: 77.721382


Epoch 64/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.24it/s]

Batch 800/4538 | Loss: 90.277977


Epoch 64/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.34it/s]

Batch 1000/4538 | Loss: 83.428116


Epoch 64/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:21, 41.09it/s]

Batch 1200/4538 | Loss: 89.626549


Epoch 64/100 [Train]:  31%|███       | 1404/4538 [00:33<01:16, 41.09it/s]

Batch 1400/4538 | Loss: 99.108582


Epoch 64/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:10, 41.35it/s]

Batch 1600/4538 | Loss: 78.004997


Epoch 64/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.34it/s]

Batch 1800/4538 | Loss: 82.424187


Epoch 64/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.36it/s]

Batch 2000/4538 | Loss: 85.863823


Epoch 64/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.12it/s]

Batch 2200/4538 | Loss: 81.548241


Epoch 64/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.62it/s]

Batch 2400/4538 | Loss: 91.939644


Epoch 64/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 41.10it/s]

Batch 2600/4538 | Loss: 76.454521


Epoch 64/100 [Train]:  62%|██████▏   | 2804/4538 [01:07<00:42, 41.23it/s]

Batch 2800/4538 | Loss: 76.707893


Epoch 64/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 40.67it/s]

Batch 3000/4538 | Loss: 80.143448


Epoch 64/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.45it/s]

Batch 3200/4538 | Loss: 77.591919


Epoch 64/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.44it/s]

Batch 3400/4538 | Loss: 76.478447


Epoch 64/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.32it/s]

Batch 3600/4538 | Loss: 93.321953


Epoch 64/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.00it/s]

Batch 3800/4538 | Loss: 88.295341


Epoch 64/100 [Train]:  88%|████████▊ | 4004/4538 [01:36<00:12, 41.10it/s]

Batch 4000/4538 | Loss: 82.633690


Epoch 64/100 [Train]:  93%|█████████▎| 4204/4538 [01:41<00:08, 41.40it/s]

Batch 4200/4538 | Loss: 89.574722


Epoch 64/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 41.24it/s]

Batch 4400/4538 | Loss: 88.547729


Epoch 64/100 [Train]: 100%|██████████| 4538/4538 [01:49<00:00, 41.31it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.60it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 98.68it/s] 


Epoch 64/100 | Average Train Loss: 86.285988 | Average Validation Loss: 85.778519
Saved new best model with validation loss: 85.778519


Epoch 65/100 [Train]:   0%|          | 4/4538 [00:00<02:14, 33.67it/s]

Batch 0/4538 | Loss: 82.145721


Epoch 65/100 [Train]:   4%|▍         | 204/4538 [00:04<01:44, 41.32it/s]

Batch 200/4538 | Loss: 85.489594


Epoch 65/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.15it/s]

Batch 400/4538 | Loss: 73.026283


Epoch 65/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.01it/s]

Batch 600/4538 | Loss: 94.098618


Epoch 65/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.39it/s]

Batch 800/4538 | Loss: 80.998917


Epoch 65/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 41.07it/s]

Batch 1000/4538 | Loss: 112.861885


Epoch 65/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.21it/s]

Batch 1200/4538 | Loss: 76.750099


Epoch 65/100 [Train]:  31%|███       | 1404/4538 [00:34<01:15, 41.40it/s]

Batch 1400/4538 | Loss: 86.856087


Epoch 65/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:10, 41.43it/s]

Batch 1600/4538 | Loss: 87.532516


Epoch 65/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.10it/s]

Batch 1800/4538 | Loss: 86.433876


Epoch 65/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.18it/s]

Batch 2000/4538 | Loss: 67.939156


Epoch 65/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.43it/s]

Batch 2200/4538 | Loss: 93.546471


Epoch 65/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.21it/s]

Batch 2400/4538 | Loss: 118.653442


Epoch 65/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:47, 41.04it/s]

Batch 2600/4538 | Loss: 105.741341


Epoch 65/100 [Train]:  62%|██████▏   | 2804/4538 [01:07<00:42, 41.21it/s]

Batch 2800/4538 | Loss: 92.113770


Epoch 65/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.15it/s]

Batch 3000/4538 | Loss: 86.375389


Epoch 65/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.37it/s]

Batch 3200/4538 | Loss: 92.796669


Epoch 65/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.34it/s]

Batch 3400/4538 | Loss: 74.074753


Epoch 65/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.30it/s]

Batch 3600/4538 | Loss: 84.381142


Epoch 65/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.41it/s]

Batch 3800/4538 | Loss: 76.292030


Epoch 65/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:12, 41.11it/s]

Batch 4000/4538 | Loss: 87.828629


Epoch 65/100 [Train]:  93%|█████████▎| 4204/4538 [01:41<00:08, 41.28it/s]

Batch 4200/4538 | Loss: 76.808884


Epoch 65/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 41.22it/s]

Batch 4400/4538 | Loss: 85.647736


Epoch 65/100 [Train]: 100%|██████████| 4538/4538 [01:49<00:00, 41.28it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.37it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 96.96it/s] 


Epoch 65/100 | Average Train Loss: 86.019211 | Average Validation Loss: 85.551857
Saved new best model with validation loss: 85.551857


Epoch 66/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 89.736626


Epoch 66/100 [Train]:   5%|▍         | 208/4538 [00:05<01:45, 41.02it/s]

Batch 200/4538 | Loss: 80.620720


Epoch 66/100 [Train]:   9%|▉         | 406/4538 [00:10<01:46, 38.91it/s]

Batch 400/4538 | Loss: 85.147346


Epoch 66/100 [Train]:  13%|█▎        | 606/4538 [00:15<01:41, 38.58it/s]

Batch 600/4538 | Loss: 81.292328


Epoch 66/100 [Train]:  18%|█▊        | 806/4538 [00:20<01:36, 38.59it/s]

Batch 800/4538 | Loss: 79.570343


Epoch 66/100 [Train]:  22%|██▏       | 1006/4538 [00:25<01:30, 38.87it/s]

Batch 1000/4538 | Loss: 92.701225


Epoch 66/100 [Train]:  27%|██▋       | 1206/4538 [00:30<01:25, 38.75it/s]

Batch 1200/4538 | Loss: 79.455315


Epoch 66/100 [Train]:  31%|███       | 1406/4538 [00:36<01:21, 38.65it/s]

Batch 1400/4538 | Loss: 78.441551


Epoch 66/100 [Train]:  35%|███▌      | 1606/4538 [00:41<01:11, 41.00it/s]

Batch 1600/4538 | Loss: 72.181221


Epoch 66/100 [Train]:  40%|███▉      | 1806/4538 [00:46<01:06, 41.25it/s]

Batch 1800/4538 | Loss: 72.748924


Epoch 66/100 [Train]:  44%|████▍     | 2006/4538 [00:50<01:01, 41.32it/s]

Batch 2000/4538 | Loss: 104.860985


Epoch 66/100 [Train]:  49%|████▊     | 2206/4538 [00:55<00:56, 41.60it/s]

Batch 2200/4538 | Loss: 85.917625


Epoch 66/100 [Train]:  53%|█████▎    | 2406/4538 [01:00<00:51, 41.02it/s]

Batch 2400/4538 | Loss: 86.022148


Epoch 66/100 [Train]:  57%|█████▋    | 2606/4538 [01:05<00:46, 41.11it/s]

Batch 2600/4538 | Loss: 94.097786


Epoch 66/100 [Train]:  62%|██████▏   | 2806/4538 [01:10<00:41, 41.27it/s]

Batch 2800/4538 | Loss: 93.671013


Epoch 66/100 [Train]:  66%|██████▌   | 3006/4538 [01:15<00:37, 41.15it/s]

Batch 3000/4538 | Loss: 78.480263


Epoch 66/100 [Train]:  71%|███████   | 3206/4538 [01:19<00:32, 41.16it/s]

Batch 3200/4538 | Loss: 81.983627


Epoch 66/100 [Train]:  75%|███████▌  | 3406/4538 [01:24<00:27, 41.28it/s]

Batch 3400/4538 | Loss: 79.935951


Epoch 66/100 [Train]:  79%|███████▉  | 3606/4538 [01:29<00:22, 41.25it/s]

Batch 3600/4538 | Loss: 72.266655


Epoch 66/100 [Train]:  84%|████████▍ | 3806/4538 [01:34<00:17, 41.48it/s]

Batch 3800/4538 | Loss: 88.623566


Epoch 66/100 [Train]:  88%|████████▊ | 4006/4538 [01:39<00:12, 41.15it/s]

Batch 4000/4538 | Loss: 72.819260


Epoch 66/100 [Train]:  93%|█████████▎| 4206/4538 [01:44<00:08, 41.15it/s]

Batch 4200/4538 | Loss: 98.860291


Epoch 66/100 [Train]:  97%|█████████▋| 4406/4538 [01:49<00:03, 41.13it/s]

Batch 4400/4538 | Loss: 86.201729


Epoch 66/100 [Train]: 100%|██████████| 4538/4538 [01:52<00:00, 40.42it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.28it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 99.71it/s] 


Epoch 66/100 | Average Train Loss: 85.771309 | Average Validation Loss: 85.325935
Saved new best model with validation loss: 85.325935


Epoch 67/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 71.326546


Epoch 67/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.61it/s]

Batch 200/4538 | Loss: 79.158127


Epoch 67/100 [Train]:   9%|▉         | 407/4538 [00:10<01:47, 38.58it/s]

Batch 400/4538 | Loss: 65.648872


Epoch 67/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:42, 38.49it/s]

Batch 600/4538 | Loss: 59.299290


Epoch 67/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.61it/s]

Batch 800/4538 | Loss: 95.729973


Epoch 67/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.48it/s]

Batch 1000/4538 | Loss: 84.422081


Epoch 67/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.68it/s]

Batch 1200/4538 | Loss: 99.173515


Epoch 67/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.63it/s]

Batch 1400/4538 | Loss: 77.142395


Epoch 67/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.53it/s]

Batch 1600/4538 | Loss: 85.532799


Epoch 67/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:10, 38.47it/s]

Batch 1800/4538 | Loss: 89.705963


Epoch 67/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.54it/s]

Batch 2000/4538 | Loss: 81.108490


Epoch 67/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.42it/s]

Batch 2200/4538 | Loss: 82.535446


Epoch 67/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.66it/s]

Batch 2400/4538 | Loss: 98.728607


Epoch 67/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.54it/s]

Batch 2600/4538 | Loss: 79.060829


Epoch 67/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.48it/s]

Batch 2800/4538 | Loss: 78.306137


Epoch 67/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.56it/s]

Batch 3000/4538 | Loss: 63.840637


Epoch 67/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.60it/s]

Batch 3200/4538 | Loss: 76.896332


Epoch 67/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.38it/s]

Batch 3400/4538 | Loss: 57.087479


Epoch 67/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.46it/s]

Batch 3600/4538 | Loss: 81.078972


Epoch 67/100 [Train]:  84%|████████▍ | 3807/4538 [01:38<00:18, 38.53it/s]

Batch 3800/4538 | Loss: 90.118965


Epoch 67/100 [Train]:  88%|████████▊ | 4007/4538 [01:44<00:13, 38.34it/s]

Batch 4000/4538 | Loss: 87.056503


Epoch 67/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.20it/s]

Batch 4200/4538 | Loss: 79.624809


Epoch 67/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.49it/s]

Batch 4400/4538 | Loss: 88.428368


Epoch 67/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.71it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.19it/s]


Epoch 67/100 | Average Train Loss: 85.534775 | Average Validation Loss: 85.097147
Saved new best model with validation loss: 85.097147


Epoch 68/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 83.002357


Epoch 68/100 [Train]:   5%|▍         | 208/4538 [00:05<01:52, 38.55it/s]

Batch 200/4538 | Loss: 102.689346


Epoch 68/100 [Train]:   9%|▉         | 408/4538 [00:10<01:47, 38.59it/s]

Batch 400/4538 | Loss: 110.480759


Epoch 68/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:42, 38.49it/s]

Batch 600/4538 | Loss: 93.940926


Epoch 68/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:37, 38.44it/s]

Batch 800/4538 | Loss: 100.044510


Epoch 68/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:31, 38.46it/s]

Batch 1000/4538 | Loss: 87.226334


Epoch 68/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:26, 38.52it/s]

Batch 1200/4538 | Loss: 92.144539


Epoch 68/100 [Train]:  31%|███       | 1408/4538 [00:36<01:21, 38.50it/s]

Batch 1400/4538 | Loss: 82.381462


Epoch 68/100 [Train]:  35%|███▌      | 1608/4538 [00:41<01:16, 38.41it/s]

Batch 1600/4538 | Loss: 57.063515


Epoch 68/100 [Train]:  40%|███▉      | 1808/4538 [00:46<01:10, 38.69it/s]

Batch 1800/4538 | Loss: 91.303856


Epoch 68/100 [Train]:  44%|████▍     | 2008/4538 [00:52<01:05, 38.41it/s]

Batch 2000/4538 | Loss: 82.168915


Epoch 68/100 [Train]:  49%|████▊     | 2208/4538 [00:57<01:00, 38.45it/s]

Batch 2200/4538 | Loss: 94.363281


Epoch 68/100 [Train]:  53%|█████▎    | 2408/4538 [01:02<00:55, 38.43it/s]

Batch 2400/4538 | Loss: 90.102371


Epoch 68/100 [Train]:  57%|█████▋    | 2608/4538 [01:07<00:50, 38.43it/s]

Batch 2600/4538 | Loss: 83.739929


Epoch 68/100 [Train]:  62%|██████▏   | 2808/4538 [01:12<00:44, 38.47it/s]

Batch 2800/4538 | Loss: 97.981842


Epoch 68/100 [Train]:  66%|██████▋   | 3008/4538 [01:18<00:39, 38.41it/s]

Batch 3000/4538 | Loss: 91.758545


Epoch 68/100 [Train]:  71%|███████   | 3208/4538 [01:23<00:34, 38.50it/s]

Batch 3200/4538 | Loss: 79.981750


Epoch 68/100 [Train]:  75%|███████▌  | 3408/4538 [01:28<00:29, 38.36it/s]

Batch 3400/4538 | Loss: 89.892365


Epoch 68/100 [Train]:  80%|███████▉  | 3608/4538 [01:33<00:24, 38.48it/s]

Batch 3600/4538 | Loss: 68.892784


Epoch 68/100 [Train]:  84%|████████▍ | 3804/4538 [01:38<00:19, 37.53it/s]

Batch 3800/4538 | Loss: 80.592781


Epoch 68/100 [Train]:  88%|████████▊ | 4008/4538 [01:44<00:13, 38.50it/s]

Batch 4000/4538 | Loss: 71.818802


Epoch 68/100 [Train]:  93%|█████████▎| 4208/4538 [01:49<00:08, 38.43it/s]

Batch 4200/4538 | Loss: 90.243286


Epoch 68/100 [Train]:  97%|█████████▋| 4408/4538 [01:54<00:03, 38.39it/s]

Batch 4400/4538 | Loss: 96.315491


Epoch 68/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.47it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.88it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 97.11it/s] 


Epoch 68/100 | Average Train Loss: 85.315720 | Average Validation Loss: 84.935861
Saved new best model with validation loss: 84.935861


Epoch 69/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 89.587509


Epoch 69/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.36it/s]

Batch 200/4538 | Loss: 79.610512


Epoch 69/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.71it/s]

Batch 400/4538 | Loss: 97.497116


Epoch 69/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:42, 38.41it/s]

Batch 600/4538 | Loss: 76.046753


Epoch 69/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.72it/s]

Batch 800/4538 | Loss: 95.056908


Epoch 69/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.57it/s]

Batch 1000/4538 | Loss: 78.843468


Epoch 69/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.62it/s]

Batch 1200/4538 | Loss: 94.946587


Epoch 69/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.49it/s]

Batch 1400/4538 | Loss: 92.002357


Epoch 69/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:15, 38.69it/s]

Batch 1600/4538 | Loss: 90.344162


Epoch 69/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:10, 38.60it/s]

Batch 1800/4538 | Loss: 76.618416


Epoch 69/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.51it/s]

Batch 2000/4538 | Loss: 85.382484


Epoch 69/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.42it/s]

Batch 2200/4538 | Loss: 57.540604


Epoch 69/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.38it/s]

Batch 2400/4538 | Loss: 73.460236


Epoch 69/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.48it/s]

Batch 2600/4538 | Loss: 78.959923


Epoch 69/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.59it/s]

Batch 2800/4538 | Loss: 78.214066


Epoch 69/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.60it/s]

Batch 3000/4538 | Loss: 81.161552


Epoch 69/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.71it/s]

Batch 3200/4538 | Loss: 96.086182


Epoch 69/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.28it/s]

Batch 3400/4538 | Loss: 95.940895


Epoch 69/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.65it/s]

Batch 3600/4538 | Loss: 102.464485


Epoch 69/100 [Train]:  84%|████████▍ | 3807/4538 [01:38<00:18, 38.48it/s]

Batch 3800/4538 | Loss: 105.243797


Epoch 69/100 [Train]:  88%|████████▊ | 4007/4538 [01:43<00:13, 38.28it/s]

Batch 4000/4538 | Loss: 84.256226


Epoch 69/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.61it/s]

Batch 4200/4538 | Loss: 105.514183


Epoch 69/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.55it/s]

Batch 4400/4538 | Loss: 79.528336


Epoch 69/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.55it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.09it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 99.08it/s] 


Epoch 69/100 | Average Train Loss: 85.111984 | Average Validation Loss: 84.729348
Saved new best model with validation loss: 84.729348


Epoch 70/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.096672


Epoch 70/100 [Train]:   5%|▍         | 208/4538 [00:05<01:51, 38.70it/s]

Batch 200/4538 | Loss: 97.829330


Epoch 70/100 [Train]:   9%|▉         | 408/4538 [00:10<01:47, 38.49it/s]

Batch 400/4538 | Loss: 88.057899


Epoch 70/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:40, 38.92it/s]

Batch 600/4538 | Loss: 79.463058


Epoch 70/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:36, 38.61it/s]

Batch 800/4538 | Loss: 74.327240


Epoch 70/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:31, 38.56it/s]

Batch 1000/4538 | Loss: 81.726463


Epoch 70/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:26, 38.57it/s]

Batch 1200/4538 | Loss: 57.373344


Epoch 70/100 [Train]:  31%|███       | 1408/4538 [00:36<01:21, 38.52it/s]

Batch 1400/4538 | Loss: 64.834282


Epoch 70/100 [Train]:  35%|███▌      | 1608/4538 [00:41<01:15, 38.57it/s]

Batch 1600/4538 | Loss: 88.286148


Epoch 70/100 [Train]:  40%|███▉      | 1808/4538 [00:46<01:10, 38.64it/s]

Batch 1800/4538 | Loss: 83.969376


Epoch 70/100 [Train]:  44%|████▍     | 2008/4538 [00:52<01:05, 38.47it/s]

Batch 2000/4538 | Loss: 75.788979


Epoch 70/100 [Train]:  49%|████▊     | 2208/4538 [00:57<01:00, 38.60it/s]

Batch 2200/4538 | Loss: 80.493202


Epoch 70/100 [Train]:  53%|█████▎    | 2408/4538 [01:02<00:55, 38.51it/s]

Batch 2400/4538 | Loss: 87.441780


Epoch 70/100 [Train]:  57%|█████▋    | 2608/4538 [01:07<00:50, 38.37it/s]

Batch 2600/4538 | Loss: 83.299881


Epoch 70/100 [Train]:  62%|██████▏   | 2808/4538 [01:12<00:44, 38.78it/s]

Batch 2800/4538 | Loss: 88.503212


Epoch 70/100 [Train]:  66%|██████▋   | 3008/4538 [01:18<00:39, 38.32it/s]

Batch 3000/4538 | Loss: 75.820549


Epoch 70/100 [Train]:  71%|███████   | 3208/4538 [01:23<00:34, 38.56it/s]

Batch 3200/4538 | Loss: 80.899307


Epoch 70/100 [Train]:  75%|███████▌  | 3408/4538 [01:28<00:27, 41.15it/s]

Batch 3400/4538 | Loss: 93.303825


Epoch 70/100 [Train]:  80%|███████▉  | 3608/4538 [01:32<00:22, 41.40it/s]

Batch 3600/4538 | Loss: 79.867661


Epoch 70/100 [Train]:  84%|████████▍ | 3808/4538 [01:37<00:17, 41.43it/s]

Batch 3800/4538 | Loss: 85.731445


Epoch 70/100 [Train]:  88%|████████▊ | 4008/4538 [01:42<00:12, 41.23it/s]

Batch 4000/4538 | Loss: 79.446892


Epoch 70/100 [Train]:  93%|█████████▎| 4208/4538 [01:47<00:08, 41.23it/s]

Batch 4200/4538 | Loss: 87.772324


Epoch 70/100 [Train]:  97%|█████████▋| 4408/4538 [01:52<00:03, 40.98it/s]

Batch 4400/4538 | Loss: 83.593643


Epoch 70/100 [Train]: 100%|██████████| 4538/4538 [01:55<00:00, 39.27it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.01it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 98.60it/s] 


Epoch 70/100 | Average Train Loss: 84.923965 | Average Validation Loss: 84.565834
Saved new best model with validation loss: 84.565834


Epoch 71/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 69.902100


Epoch 71/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.00it/s]

Batch 200/4538 | Loss: 71.979843


Epoch 71/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.21it/s]

Batch 400/4538 | Loss: 80.313629


Epoch 71/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.24it/s]

Batch 600/4538 | Loss: 83.783852


Epoch 71/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.19it/s]

Batch 800/4538 | Loss: 79.096657


Epoch 71/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.37it/s]

Batch 1000/4538 | Loss: 81.258766


Epoch 71/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.21it/s]

Batch 1200/4538 | Loss: 64.866608


Epoch 71/100 [Train]:  31%|███       | 1404/4538 [00:34<01:15, 41.41it/s]

Batch 1400/4538 | Loss: 75.164215


Epoch 71/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:11, 41.17it/s]

Batch 1600/4538 | Loss: 94.503380


Epoch 71/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.37it/s]

Batch 1800/4538 | Loss: 75.153625


Epoch 71/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.34it/s]

Batch 2000/4538 | Loss: 90.631752


Epoch 71/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.39it/s]

Batch 2200/4538 | Loss: 83.304955


Epoch 71/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.22it/s]

Batch 2400/4538 | Loss: 76.984947


Epoch 71/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:46, 41.51it/s]

Batch 2600/4538 | Loss: 91.571121


Epoch 71/100 [Train]:  62%|██████▏   | 2804/4538 [01:07<00:41, 41.55it/s]

Batch 2800/4538 | Loss: 94.351006


Epoch 71/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.19it/s]

Batch 3000/4538 | Loss: 67.606102


Epoch 71/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.23it/s]

Batch 3200/4538 | Loss: 86.826118


Epoch 71/100 [Train]:  75%|███████▌  | 3404/4538 [01:22<00:27, 41.26it/s]

Batch 3400/4538 | Loss: 78.543030


Epoch 71/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 41.25it/s]

Batch 3600/4538 | Loss: 99.365440


Epoch 71/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:17, 41.40it/s]

Batch 3800/4538 | Loss: 108.709625


Epoch 71/100 [Train]:  88%|████████▊ | 4004/4538 [01:37<00:12, 41.09it/s]

Batch 4000/4538 | Loss: 80.719215


Epoch 71/100 [Train]:  93%|█████████▎| 4204/4538 [01:41<00:08, 41.23it/s]

Batch 4200/4538 | Loss: 79.414215


Epoch 71/100 [Train]:  97%|█████████▋| 4404/4538 [01:46<00:03, 41.20it/s]

Batch 4400/4538 | Loss: 66.834801


Epoch 71/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.24it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:35<00:00, 128.98it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 90.83it/s]


Epoch 71/100 | Average Train Loss: 84.749352 | Average Validation Loss: 84.404359
Saved new best model with validation loss: 84.404359


Epoch 72/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.58it/s]

Batch 0/4538 | Loss: 102.111740


Epoch 72/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.81it/s]

Batch 200/4538 | Loss: 101.944427


Epoch 72/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.48it/s]

Batch 400/4538 | Loss: 76.359856


Epoch 72/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.48it/s]

Batch 600/4538 | Loss: 86.406746


Epoch 72/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.49it/s]

Batch 800/4538 | Loss: 101.017754


Epoch 72/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.47it/s]

Batch 1000/4538 | Loss: 85.654404


Epoch 72/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.46it/s]

Batch 1200/4538 | Loss: 126.445076


Epoch 72/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.52it/s]

Batch 1400/4538 | Loss: 96.286293


Epoch 72/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.51it/s]

Batch 1600/4538 | Loss: 92.569954


Epoch 72/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.38it/s]

Batch 1800/4538 | Loss: 76.782700


Epoch 72/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.51it/s]

Batch 2000/4538 | Loss: 84.086472


Epoch 72/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.38it/s]

Batch 2200/4538 | Loss: 103.522400


Epoch 72/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.51it/s]

Batch 2400/4538 | Loss: 96.459442


Epoch 72/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.55it/s]

Batch 2600/4538 | Loss: 73.423096


Epoch 72/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.51it/s]

Batch 2800/4538 | Loss: 91.028191


Epoch 72/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.47it/s]

Batch 3000/4538 | Loss: 88.560623


Epoch 72/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.44it/s]

Batch 3200/4538 | Loss: 74.135063


Epoch 72/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.45it/s]

Batch 3400/4538 | Loss: 78.447258


Epoch 72/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.42it/s]

Batch 3600/4538 | Loss: 73.857513


Epoch 72/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.46it/s]

Batch 3800/4538 | Loss: 81.805115


Epoch 72/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.54it/s]

Batch 4000/4538 | Loss: 79.238968


Epoch 72/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.49it/s]

Batch 4200/4538 | Loss: 65.030945


Epoch 72/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.37it/s]

Batch 4400/4538 | Loss: 82.283234


Epoch 72/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.49it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.30it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 93.77it/s]


Epoch 72/100 | Average Train Loss: 84.587303 | Average Validation Loss: 84.270522
Saved new best model with validation loss: 84.270522


Epoch 73/100 [Train]:   0%|          | 7/4538 [00:00<02:25, 31.06it/s]

Batch 0/4538 | Loss: 85.028160


Epoch 73/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.48it/s]

Batch 200/4538 | Loss: 94.646446


Epoch 73/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.48it/s]

Batch 400/4538 | Loss: 74.361870


Epoch 73/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:54, 34.46it/s]

Batch 600/4538 | Loss: 77.077965


Epoch 73/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.51it/s]

Batch 800/4538 | Loss: 101.409447


Epoch 73/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.61it/s]

Batch 1000/4538 | Loss: 85.450829


Epoch 73/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.62it/s]

Batch 1200/4538 | Loss: 73.807922


Epoch 73/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.65it/s]

Batch 1400/4538 | Loss: 71.753830


Epoch 73/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.59it/s]

Batch 1600/4538 | Loss: 84.401062


Epoch 73/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.54it/s]

Batch 1800/4538 | Loss: 119.811241


Epoch 73/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.48it/s]

Batch 2000/4538 | Loss: 78.521919


Epoch 73/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.45it/s]

Batch 2200/4538 | Loss: 85.559654


Epoch 73/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.55it/s]

Batch 2400/4538 | Loss: 80.294083


Epoch 73/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:56, 34.42it/s]

Batch 2600/4538 | Loss: 79.614243


Epoch 73/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.53it/s]

Batch 2800/4538 | Loss: 81.457787


Epoch 73/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.47it/s]

Batch 3000/4538 | Loss: 77.099312


Epoch 73/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.48it/s]

Batch 3200/4538 | Loss: 77.794281


Epoch 73/100 [Train]:  75%|███████▍  | 3403/4538 [01:38<00:34, 32.92it/s]

Batch 3400/4538 | Loss: 75.654182


Epoch 73/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.49it/s]

Batch 3600/4538 | Loss: 87.612579


Epoch 73/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.53it/s]

Batch 3800/4538 | Loss: 91.837830


Epoch 73/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.42it/s]

Batch 4000/4538 | Loss: 85.493965


Epoch 73/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.59it/s]

Batch 4200/4538 | Loss: 113.680862


Epoch 73/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.42it/s]

Batch 4400/4538 | Loss: 84.818611


Epoch 73/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.49it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.80it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.79it/s]


Epoch 73/100 | Average Train Loss: 84.430984 | Average Validation Loss: 84.140217
Saved new best model with validation loss: 84.140217


Epoch 74/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.34it/s]

Batch 0/4538 | Loss: 80.211914


Epoch 74/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.53it/s]

Batch 200/4538 | Loss: 90.700356


Epoch 74/100 [Train]:   9%|▉         | 407/4538 [00:11<02:01, 33.92it/s]

Batch 400/4538 | Loss: 71.961723


Epoch 74/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.50it/s]

Batch 600/4538 | Loss: 92.918701


Epoch 74/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.44it/s]

Batch 800/4538 | Loss: 78.676567


Epoch 74/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.53it/s]

Batch 1000/4538 | Loss: 80.187531


Epoch 74/100 [Train]:  27%|██▋       | 1207/4538 [00:35<01:36, 34.41it/s]

Batch 1200/4538 | Loss: 77.015991


Epoch 74/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.47it/s]

Batch 1400/4538 | Loss: 98.861641


Epoch 74/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.47it/s]

Batch 1600/4538 | Loss: 81.949577


Epoch 74/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.48it/s]

Batch 1800/4538 | Loss: 70.124008


Epoch 74/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.46it/s]

Batch 2000/4538 | Loss: 76.653389


Epoch 74/100 [Train]:  49%|████▊     | 2207/4538 [01:04<01:07, 34.43it/s]

Batch 2200/4538 | Loss: 83.681313


Epoch 74/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.42it/s]

Batch 2400/4538 | Loss: 97.975601


Epoch 74/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.50it/s]

Batch 2600/4538 | Loss: 76.005524


Epoch 74/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.46it/s]

Batch 2800/4538 | Loss: 58.860184


Epoch 74/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.34it/s]

Batch 3000/4538 | Loss: 60.890049


Epoch 74/100 [Train]:  71%|███████   | 3207/4538 [01:33<00:38, 34.46it/s]

Batch 3200/4538 | Loss: 84.094757


Epoch 74/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.56it/s]

Batch 3400/4538 | Loss: 66.814034


Epoch 74/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.54it/s]

Batch 3600/4538 | Loss: 99.804771


Epoch 74/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.56it/s]

Batch 3800/4538 | Loss: 82.300934


Epoch 74/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.47it/s]

Batch 4000/4538 | Loss: 85.138069


Epoch 74/100 [Train]:  93%|█████████▎| 4207/4538 [02:02<00:09, 34.52it/s]

Batch 4200/4538 | Loss: 82.166161


Epoch 74/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.58it/s]

Batch 4400/4538 | Loss: 74.523361


Epoch 74/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.46it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.43it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.52it/s]


Epoch 74/100 | Average Train Loss: 84.291870 | Average Validation Loss: 84.024667
Saved new best model with validation loss: 84.024667


Epoch 75/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.42it/s]

Batch 0/4538 | Loss: 75.711021


Epoch 75/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.53it/s]

Batch 200/4538 | Loss: 93.261261


Epoch 75/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.54it/s]

Batch 400/4538 | Loss: 101.275955


Epoch 75/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.57it/s]

Batch 600/4538 | Loss: 79.370911


Epoch 75/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.57it/s]

Batch 800/4538 | Loss: 116.745781


Epoch 75/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:43, 34.23it/s]

Batch 1000/4538 | Loss: 70.860680


Epoch 75/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.54it/s]

Batch 1200/4538 | Loss: 90.137131


Epoch 75/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.55it/s]

Batch 1400/4538 | Loss: 78.755371


Epoch 75/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.48it/s]

Batch 1600/4538 | Loss: 79.851967


Epoch 75/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.62it/s]

Batch 1800/4538 | Loss: 94.318527


Epoch 75/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.49it/s]

Batch 2000/4538 | Loss: 84.742210


Epoch 75/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.53it/s]

Batch 2200/4538 | Loss: 75.658836


Epoch 75/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.58it/s]

Batch 2400/4538 | Loss: 72.307396


Epoch 75/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.53it/s]

Batch 2600/4538 | Loss: 89.487038


Epoch 75/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.54it/s]

Batch 2800/4538 | Loss: 73.716995


Epoch 75/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.58it/s]

Batch 3000/4538 | Loss: 89.013931


Epoch 75/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.52it/s]

Batch 3200/4538 | Loss: 86.550285


Epoch 75/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.46it/s]

Batch 3400/4538 | Loss: 75.143547


Epoch 75/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:27, 34.26it/s]

Batch 3600/4538 | Loss: 84.122314


Epoch 75/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.47it/s]

Batch 3800/4538 | Loss: 98.269928


Epoch 75/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.37it/s]

Batch 4000/4538 | Loss: 82.647682


Epoch 75/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.54it/s]

Batch 4200/4538 | Loss: 79.493126


Epoch 75/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.40it/s]

Batch 4400/4538 | Loss: 69.590508


Epoch 75/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.53it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 122.84it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 93.03it/s]


Epoch 75/100 | Average Train Loss: 84.160980 | Average Validation Loss: 83.897230
Saved new best model with validation loss: 83.897230


Epoch 76/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.38it/s]

Batch 0/4538 | Loss: 84.408218


Epoch 76/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.77it/s]

Batch 200/4538 | Loss: 104.084846


Epoch 76/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.77it/s]

Batch 400/4538 | Loss: 86.619461


Epoch 76/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.53it/s]

Batch 600/4538 | Loss: 83.289276


Epoch 76/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.73it/s]

Batch 800/4538 | Loss: 65.354622


Epoch 76/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.78it/s]

Batch 1000/4538 | Loss: 83.516045


Epoch 76/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.81it/s]

Batch 1200/4538 | Loss: 86.493103


Epoch 76/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.75it/s]

Batch 1400/4538 | Loss: 84.613617


Epoch 76/100 [Train]:  35%|███▌      | 1603/4538 [00:46<01:24, 34.75it/s]

Batch 1600/4538 | Loss: 75.584671


Epoch 76/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.79it/s]

Batch 1800/4538 | Loss: 76.844086


Epoch 76/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.73it/s]

Batch 2000/4538 | Loss: 89.527168


Epoch 76/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.79it/s]

Batch 2200/4538 | Loss: 106.459770


Epoch 76/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.78it/s]

Batch 2400/4538 | Loss: 81.331505


Epoch 76/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.83it/s]

Batch 2600/4538 | Loss: 83.959030


Epoch 76/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.82it/s]

Batch 2800/4538 | Loss: 85.234718


Epoch 76/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.78it/s]

Batch 3000/4538 | Loss: 88.885345


Epoch 76/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.56it/s]

Batch 3200/4538 | Loss: 79.129684


Epoch 76/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.83it/s]

Batch 3400/4538 | Loss: 89.072311


Epoch 76/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.83it/s]

Batch 3600/4538 | Loss: 68.655937


Epoch 76/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.74it/s]

Batch 3800/4538 | Loss: 80.947456


Epoch 76/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.72it/s]

Batch 4000/4538 | Loss: 103.371124


Epoch 76/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.68it/s]

Batch 4200/4538 | Loss: 88.850990


Epoch 76/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.72it/s]

Batch 4400/4538 | Loss: 98.107460


Epoch 76/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.73it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.78it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.43it/s]


Epoch 76/100 | Average Train Loss: 84.032265 | Average Validation Loss: 83.796026
Saved new best model with validation loss: 83.796026


Epoch 77/100 [Train]:   0%|          | 7/4538 [00:00<02:24, 31.32it/s]

Batch 0/4538 | Loss: 70.913567


Epoch 77/100 [Train]:   5%|▍         | 207/4538 [00:05<02:05, 34.55it/s]

Batch 200/4538 | Loss: 78.723213


Epoch 77/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.73it/s]

Batch 400/4538 | Loss: 54.304462


Epoch 77/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.71it/s]

Batch 600/4538 | Loss: 77.967003


Epoch 77/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.78it/s]

Batch 800/4538 | Loss: 76.167610


Epoch 77/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.76it/s]

Batch 1000/4538 | Loss: 92.726257


Epoch 77/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.73it/s]

Batch 1200/4538 | Loss: 99.049103


Epoch 77/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.77it/s]

Batch 1400/4538 | Loss: 89.550972


Epoch 77/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.75it/s]

Batch 1600/4538 | Loss: 99.123642


Epoch 77/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.81it/s]

Batch 1800/4538 | Loss: 95.295929


Epoch 77/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.72it/s]

Batch 2000/4538 | Loss: 85.741730


Epoch 77/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.81it/s]

Batch 2200/4538 | Loss: 80.876686


Epoch 77/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.82it/s]

Batch 2400/4538 | Loss: 69.493019


Epoch 77/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.65it/s]

Batch 2600/4538 | Loss: 59.622150


Epoch 77/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:51, 33.65it/s]

Batch 2800/4538 | Loss: 87.045776


Epoch 77/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.65it/s]

Batch 3000/4538 | Loss: 78.981354


Epoch 77/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.62it/s]

Batch 3200/4538 | Loss: 101.015282


Epoch 77/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.70it/s]

Batch 3400/4538 | Loss: 77.411705


Epoch 77/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.66it/s]

Batch 3600/4538 | Loss: 92.651802


Epoch 77/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.72it/s]

Batch 3800/4538 | Loss: 97.002335


Epoch 77/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.73it/s]

Batch 4000/4538 | Loss: 77.779388


Epoch 77/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.71it/s]

Batch 4200/4538 | Loss: 98.609970


Epoch 77/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.64it/s]

Batch 4400/4538 | Loss: 70.077415


Epoch 77/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.72it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 124.39it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.39it/s]


Epoch 77/100 | Average Train Loss: 83.922348 | Average Validation Loss: 83.710774
Saved new best model with validation loss: 83.710774


Epoch 78/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.59it/s]

Batch 0/4538 | Loss: 77.226379


Epoch 78/100 [Train]:   5%|▍         | 207/4538 [00:05<02:05, 34.60it/s]

Batch 200/4538 | Loss: 78.000122


Epoch 78/100 [Train]:   9%|▉         | 407/4538 [00:11<01:58, 34.72it/s]

Batch 400/4538 | Loss: 80.956032


Epoch 78/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.64it/s]

Batch 600/4538 | Loss: 83.923332


Epoch 78/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.60it/s]

Batch 800/4538 | Loss: 82.738693


Epoch 78/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.54it/s]

Batch 1000/4538 | Loss: 96.776825


Epoch 78/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.61it/s]

Batch 1200/4538 | Loss: 108.781082


Epoch 78/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.48it/s]

Batch 1400/4538 | Loss: 60.598495


Epoch 78/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.64it/s]

Batch 1600/4538 | Loss: 77.369576


Epoch 78/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.59it/s]

Batch 1800/4538 | Loss: 77.896263


Epoch 78/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.57it/s]

Batch 2000/4538 | Loss: 78.646812


Epoch 78/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.54it/s]

Batch 2200/4538 | Loss: 78.482933


Epoch 78/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.58it/s]

Batch 2400/4538 | Loss: 71.441719


Epoch 78/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.62it/s]

Batch 2600/4538 | Loss: 86.050079


Epoch 78/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:49, 34.64it/s]

Batch 2800/4538 | Loss: 84.722519


Epoch 78/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.50it/s]

Batch 3000/4538 | Loss: 86.438095


Epoch 78/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.57it/s]

Batch 3200/4538 | Loss: 81.269707


Epoch 78/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.62it/s]

Batch 3400/4538 | Loss: 85.917557


Epoch 78/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.67it/s]

Batch 3600/4538 | Loss: 81.156967


Epoch 78/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.60it/s]

Batch 3800/4538 | Loss: 92.143776


Epoch 78/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.64it/s]

Batch 4000/4538 | Loss: 107.852333


Epoch 78/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.57it/s]

Batch 4200/4538 | Loss: 50.989311


Epoch 78/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.61it/s]

Batch 4400/4538 | Loss: 89.540604


Epoch 78/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.59it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.87it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.40it/s]


Epoch 78/100 | Average Train Loss: 83.822021 | Average Validation Loss: 83.608568
Saved new best model with validation loss: 83.608568


Epoch 79/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.56it/s]

Batch 0/4538 | Loss: 68.684303


Epoch 79/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.73it/s]

Batch 200/4538 | Loss: 79.586464


Epoch 79/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.66it/s]

Batch 400/4538 | Loss: 99.795654


Epoch 79/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.69it/s]

Batch 600/4538 | Loss: 81.279984


Epoch 79/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.71it/s]

Batch 800/4538 | Loss: 72.362312


Epoch 79/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.67it/s]

Batch 1000/4538 | Loss: 81.664124


Epoch 79/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.63it/s]

Batch 1200/4538 | Loss: 79.543427


Epoch 79/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.65it/s]

Batch 1400/4538 | Loss: 69.992821


Epoch 79/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.64it/s]

Batch 1600/4538 | Loss: 83.489441


Epoch 79/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.58it/s]

Batch 1800/4538 | Loss: 83.704178


Epoch 79/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:14, 34.02it/s]

Batch 2000/4538 | Loss: 92.188553


Epoch 79/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.62it/s]

Batch 2200/4538 | Loss: 80.258751


Epoch 79/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.75it/s]

Batch 2400/4538 | Loss: 77.632492


Epoch 79/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.67it/s]

Batch 2600/4538 | Loss: 71.104309


Epoch 79/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:49, 34.69it/s]

Batch 2800/4538 | Loss: 90.321037


Epoch 79/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.77it/s]

Batch 3000/4538 | Loss: 87.768311


Epoch 79/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.66it/s]

Batch 3200/4538 | Loss: 89.119881


Epoch 79/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.70it/s]

Batch 3400/4538 | Loss: 81.862587


Epoch 79/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.69it/s]

Batch 3600/4538 | Loss: 76.739098


Epoch 79/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.64it/s]

Batch 3800/4538 | Loss: 82.288971


Epoch 79/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.67it/s]

Batch 4000/4538 | Loss: 77.548149


Epoch 79/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.78it/s]

Batch 4200/4538 | Loss: 76.758415


Epoch 79/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.68it/s]

Batch 4400/4538 | Loss: 84.712204


Epoch 79/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.64it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.89it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.40it/s]


Epoch 79/100 | Average Train Loss: 83.725807 | Average Validation Loss: 83.533623
Saved new best model with validation loss: 83.533623


Epoch 80/100 [Train]:   0%|          | 3/4538 [00:00<02:51, 26.43it/s]

Batch 0/4538 | Loss: 83.469353


Epoch 80/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.63it/s]

Batch 200/4538 | Loss: 76.639908


Epoch 80/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.63it/s]

Batch 400/4538 | Loss: 83.281509


Epoch 80/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.59it/s]

Batch 600/4538 | Loss: 82.889977


Epoch 80/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.64it/s]

Batch 800/4538 | Loss: 83.134964


Epoch 80/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.63it/s]

Batch 1000/4538 | Loss: 88.805763


Epoch 80/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.63it/s]

Batch 1200/4538 | Loss: 75.782669


Epoch 80/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.67it/s]

Batch 1400/4538 | Loss: 59.815056


Epoch 80/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.53it/s]

Batch 1600/4538 | Loss: 78.168159


Epoch 80/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.63it/s]

Batch 1800/4538 | Loss: 81.060097


Epoch 80/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.79it/s]

Batch 2000/4538 | Loss: 96.040108


Epoch 80/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:07, 34.77it/s]

Batch 2200/4538 | Loss: 84.626579


Epoch 80/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.79it/s]

Batch 2400/4538 | Loss: 89.209541


Epoch 80/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.70it/s]

Batch 2600/4538 | Loss: 71.909424


Epoch 80/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:49, 34.77it/s]

Batch 2800/4538 | Loss: 80.930672


Epoch 80/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:43, 34.80it/s]

Batch 3000/4538 | Loss: 74.902939


Epoch 80/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.74it/s]

Batch 3200/4538 | Loss: 76.046570


Epoch 80/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.74it/s]

Batch 3400/4538 | Loss: 99.259651


Epoch 80/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.68it/s]

Batch 3600/4538 | Loss: 76.364342


Epoch 80/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:21, 34.78it/s]

Batch 3800/4538 | Loss: 92.519592


Epoch 80/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.71it/s]

Batch 4000/4538 | Loss: 96.351624


Epoch 80/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.49it/s]

Batch 4200/4538 | Loss: 84.409103


Epoch 80/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.68it/s]

Batch 4400/4538 | Loss: 79.383270


Epoch 80/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.67it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:37<00:00, 121.37it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.67it/s]


Epoch 80/100 | Average Train Loss: 83.637894 | Average Validation Loss: 83.456755
Saved new best model with validation loss: 83.456755


Epoch 81/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.57it/s]

Batch 0/4538 | Loss: 77.280228


Epoch 81/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.38it/s]

Batch 200/4538 | Loss: 76.102165


Epoch 81/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.54it/s]

Batch 400/4538 | Loss: 77.496872


Epoch 81/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.52it/s]

Batch 600/4538 | Loss: 74.980499


Epoch 81/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:48, 34.53it/s]

Batch 800/4538 | Loss: 92.716217


Epoch 81/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:42, 34.60it/s]

Batch 1000/4538 | Loss: 84.259087


Epoch 81/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:36, 34.52it/s]

Batch 1200/4538 | Loss: 80.984909


Epoch 81/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.49it/s]

Batch 1400/4538 | Loss: 91.107956


Epoch 81/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:25, 34.38it/s]

Batch 1600/4538 | Loss: 86.308472


Epoch 81/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:19, 34.53it/s]

Batch 1800/4538 | Loss: 91.518311


Epoch 81/100 [Train]:  44%|████▍     | 2007/4538 [00:58<01:13, 34.44it/s]

Batch 2000/4538 | Loss: 85.154915


Epoch 81/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:09, 33.71it/s]

Batch 2200/4538 | Loss: 92.639900


Epoch 81/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.57it/s]

Batch 2400/4538 | Loss: 89.820839


Epoch 81/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.60it/s]

Batch 2600/4538 | Loss: 63.038376


Epoch 81/100 [Train]:  62%|██████▏   | 2807/4538 [01:21<00:50, 34.24it/s]

Batch 2800/4538 | Loss: 74.385262


Epoch 81/100 [Train]:  66%|██████▋   | 3007/4538 [01:27<00:44, 34.38it/s]

Batch 3000/4538 | Loss: 78.242996


Epoch 81/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.75it/s]

Batch 3200/4538 | Loss: 93.442200


Epoch 81/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.44it/s]

Batch 3400/4538 | Loss: 76.615776


Epoch 81/100 [Train]:  79%|███████▉  | 3607/4538 [01:44<00:26, 34.54it/s]

Batch 3600/4538 | Loss: 80.047722


Epoch 81/100 [Train]:  84%|████████▍ | 3807/4538 [01:50<00:21, 34.54it/s]

Batch 3800/4538 | Loss: 94.007164


Epoch 81/100 [Train]:  88%|████████▊ | 4007/4538 [01:56<00:15, 34.62it/s]

Batch 4000/4538 | Loss: 97.554276


Epoch 81/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.59it/s]

Batch 4200/4538 | Loss: 98.921959


Epoch 81/100 [Train]:  97%|█████████▋| 4407/4538 [02:07<00:03, 34.62it/s]

Batch 4400/4538 | Loss: 78.452217


Epoch 81/100 [Train]: 100%|██████████| 4538/4538 [02:11<00:00, 34.54it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 123.65it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 92.38it/s]


Epoch 81/100 | Average Train Loss: 83.556366 | Average Validation Loss: 83.397866
Saved new best model with validation loss: 83.397866


Epoch 82/100 [Train]:   0%|          | 7/4538 [00:00<02:23, 31.51it/s]

Batch 0/4538 | Loss: 85.253822


Epoch 82/100 [Train]:   5%|▍         | 207/4538 [00:06<02:05, 34.55it/s]

Batch 200/4538 | Loss: 81.979561


Epoch 82/100 [Train]:   9%|▉         | 407/4538 [00:11<01:59, 34.70it/s]

Batch 400/4538 | Loss: 76.993393


Epoch 82/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:53, 34.79it/s]

Batch 600/4538 | Loss: 73.561775


Epoch 82/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.75it/s]

Batch 800/4538 | Loss: 76.199356


Epoch 82/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.72it/s]

Batch 1000/4538 | Loss: 89.175713


Epoch 82/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.76it/s]

Batch 1200/4538 | Loss: 73.016922


Epoch 82/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.74it/s]

Batch 1400/4538 | Loss: 88.133179


Epoch 82/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.65it/s]

Batch 1600/4538 | Loss: 55.778698


Epoch 82/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:20, 33.97it/s]

Batch 1800/4538 | Loss: 83.464264


Epoch 82/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:12, 34.71it/s]

Batch 2000/4538 | Loss: 65.254509


Epoch 82/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.80it/s]

Batch 2200/4538 | Loss: 73.615372


Epoch 82/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.69it/s]

Batch 2400/4538 | Loss: 75.970383


Epoch 82/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.89it/s]

Batch 2600/4538 | Loss: 87.580124


Epoch 82/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 35.08it/s]

Batch 2800/4538 | Loss: 73.189972


Epoch 82/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.78it/s]

Batch 3000/4538 | Loss: 81.453194


Epoch 82/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.84it/s]

Batch 3200/4538 | Loss: 80.628075


Epoch 82/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.72it/s]

Batch 3400/4538 | Loss: 90.488564


Epoch 82/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.82it/s]

Batch 3600/4538 | Loss: 70.927612


Epoch 82/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:20, 34.88it/s]

Batch 3800/4538 | Loss: 77.473640


Epoch 82/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.74it/s]

Batch 4000/4538 | Loss: 90.230713


Epoch 82/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.69it/s]

Batch 4200/4538 | Loss: 67.668518


Epoch 82/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.64it/s]

Batch 4400/4538 | Loss: 87.103836


Epoch 82/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.74it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 125.18it/s]
Validation: 100%|██████████| 787/787 [00:08<00:00, 91.57it/s]


Epoch 82/100 | Average Train Loss: 83.493042 | Average Validation Loss: 83.335744
Saved new best model with validation loss: 83.335744


Epoch 83/100 [Train]:   0%|          | 7/4538 [00:00<02:21, 31.98it/s]

Batch 0/4538 | Loss: 73.545319


Epoch 83/100 [Train]:   5%|▍         | 207/4538 [00:05<02:04, 34.70it/s]

Batch 200/4538 | Loss: 109.655869


Epoch 83/100 [Train]:   9%|▉         | 407/4538 [00:11<02:00, 34.19it/s]

Batch 400/4538 | Loss: 98.829636


Epoch 83/100 [Train]:  13%|█▎        | 607/4538 [00:17<01:52, 34.82it/s]

Batch 600/4538 | Loss: 85.735451


Epoch 83/100 [Train]:  18%|█▊        | 807/4538 [00:23<01:47, 34.74it/s]

Batch 800/4538 | Loss: 72.291389


Epoch 83/100 [Train]:  22%|██▏       | 1007/4538 [00:29<01:41, 34.78it/s]

Batch 1000/4538 | Loss: 82.937775


Epoch 83/100 [Train]:  27%|██▋       | 1207/4538 [00:34<01:35, 34.76it/s]

Batch 1200/4538 | Loss: 86.647888


Epoch 83/100 [Train]:  31%|███       | 1407/4538 [00:40<01:30, 34.70it/s]

Batch 1400/4538 | Loss: 100.603600


Epoch 83/100 [Train]:  35%|███▌      | 1607/4538 [00:46<01:24, 34.69it/s]

Batch 1600/4538 | Loss: 92.309410


Epoch 83/100 [Train]:  40%|███▉      | 1807/4538 [00:52<01:18, 34.70it/s]

Batch 1800/4538 | Loss: 73.282234


Epoch 83/100 [Train]:  44%|████▍     | 2007/4538 [00:57<01:13, 34.67it/s]

Batch 2000/4538 | Loss: 84.740784


Epoch 83/100 [Train]:  49%|████▊     | 2207/4538 [01:03<01:06, 34.91it/s]

Batch 2200/4538 | Loss: 81.753540


Epoch 83/100 [Train]:  53%|█████▎    | 2407/4538 [01:09<01:01, 34.79it/s]

Batch 2400/4538 | Loss: 86.686890


Epoch 83/100 [Train]:  57%|█████▋    | 2607/4538 [01:15<00:55, 34.71it/s]

Batch 2600/4538 | Loss: 86.617249


Epoch 83/100 [Train]:  62%|██████▏   | 2807/4538 [01:20<00:49, 34.69it/s]

Batch 2800/4538 | Loss: 84.908279


Epoch 83/100 [Train]:  66%|██████▋   | 3007/4538 [01:26<00:44, 34.34it/s]

Batch 3000/4538 | Loss: 91.203056


Epoch 83/100 [Train]:  71%|███████   | 3207/4538 [01:32<00:38, 34.83it/s]

Batch 3200/4538 | Loss: 70.760551


Epoch 83/100 [Train]:  75%|███████▌  | 3407/4538 [01:38<00:32, 34.82it/s]

Batch 3400/4538 | Loss: 90.208366


Epoch 83/100 [Train]:  79%|███████▉  | 3607/4538 [01:43<00:26, 34.87it/s]

Batch 3600/4538 | Loss: 92.246162


Epoch 83/100 [Train]:  84%|████████▍ | 3807/4538 [01:49<00:20, 34.85it/s]

Batch 3800/4538 | Loss: 99.187447


Epoch 83/100 [Train]:  88%|████████▊ | 4007/4538 [01:55<00:15, 34.76it/s]

Batch 4000/4538 | Loss: 88.566139


Epoch 83/100 [Train]:  93%|█████████▎| 4207/4538 [02:01<00:09, 34.82it/s]

Batch 4200/4538 | Loss: 97.394218


Epoch 83/100 [Train]:  97%|█████████▋| 4407/4538 [02:06<00:03, 34.78it/s]

Batch 4400/4538 | Loss: 82.207443


Epoch 83/100 [Train]: 100%|██████████| 4538/4538 [02:10<00:00, 34.75it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:36<00:00, 125.76it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.19it/s]


Epoch 83/100 | Average Train Loss: 83.428719 | Average Validation Loss: 83.288739
Saved new best model with validation loss: 83.288739


Epoch 84/100 [Train]:   0%|          | 7/4538 [00:00<02:11, 34.50it/s]

Batch 0/4538 | Loss: 90.755875


Epoch 84/100 [Train]:   5%|▍         | 207/4538 [00:05<01:46, 40.81it/s]

Batch 200/4538 | Loss: 69.838928


Epoch 84/100 [Train]:   9%|▉         | 407/4538 [00:10<01:41, 40.87it/s]

Batch 400/4538 | Loss: 72.582497


Epoch 84/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:36, 40.85it/s]

Batch 600/4538 | Loss: 87.402252


Epoch 84/100 [Train]:  18%|█▊        | 806/4538 [00:19<01:30, 41.05it/s]

Batch 800/4538 | Loss: 93.388191


Epoch 84/100 [Train]:  22%|██▏       | 1006/4538 [00:24<01:25, 41.22it/s]

Batch 1000/4538 | Loss: 88.985703


Epoch 84/100 [Train]:  27%|██▋       | 1206/4538 [00:29<01:21, 41.13it/s]

Batch 1200/4538 | Loss: 78.642967


Epoch 84/100 [Train]:  31%|███       | 1406/4538 [00:34<01:16, 41.02it/s]

Batch 1400/4538 | Loss: 86.057770


Epoch 84/100 [Train]:  35%|███▌      | 1606/4538 [00:39<01:10, 41.43it/s]

Batch 1600/4538 | Loss: 81.776566


Epoch 84/100 [Train]:  40%|███▉      | 1806/4538 [00:44<01:06, 41.27it/s]

Batch 1800/4538 | Loss: 88.956589


Epoch 84/100 [Train]:  44%|████▍     | 2006/4538 [00:49<01:01, 41.07it/s]

Batch 2000/4538 | Loss: 86.240677


Epoch 84/100 [Train]:  49%|████▊     | 2206/4538 [00:53<00:56, 41.11it/s]

Batch 2200/4538 | Loss: 104.032013


Epoch 84/100 [Train]:  53%|█████▎    | 2406/4538 [00:58<00:52, 40.90it/s]

Batch 2400/4538 | Loss: 85.383690


Epoch 84/100 [Train]:  57%|█████▋    | 2606/4538 [01:03<00:47, 40.67it/s]

Batch 2600/4538 | Loss: 84.992928


Epoch 84/100 [Train]:  62%|██████▏   | 2806/4538 [01:08<00:42, 40.71it/s]

Batch 2800/4538 | Loss: 89.434280


Epoch 84/100 [Train]:  66%|██████▌   | 3006/4538 [01:13<00:37, 41.07it/s]

Batch 3000/4538 | Loss: 90.998230


Epoch 84/100 [Train]:  71%|███████   | 3206/4538 [01:18<00:32, 40.80it/s]

Batch 3200/4538 | Loss: 63.015839


Epoch 84/100 [Train]:  75%|███████▌  | 3406/4538 [01:23<00:27, 41.11it/s]

Batch 3400/4538 | Loss: 68.909767


Epoch 84/100 [Train]:  79%|███████▉  | 3606/4538 [01:28<00:22, 41.06it/s]

Batch 3600/4538 | Loss: 72.452431


Epoch 84/100 [Train]:  84%|████████▍ | 3806/4538 [01:32<00:17, 41.05it/s]

Batch 3800/4538 | Loss: 54.656395


Epoch 84/100 [Train]:  88%|████████▊ | 4006/4538 [01:37<00:12, 40.95it/s]

Batch 4000/4538 | Loss: 67.235901


Epoch 84/100 [Train]:  93%|█████████▎| 4206/4538 [01:42<00:08, 41.26it/s]

Batch 4200/4538 | Loss: 84.512222


Epoch 84/100 [Train]:  97%|█████████▋| 4406/4538 [01:47<00:03, 41.03it/s]

Batch 4400/4538 | Loss: 82.388412


Epoch 84/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.94it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.07it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.94it/s]


Epoch 84/100 | Average Train Loss: 83.374451 | Average Validation Loss: 83.244620
Saved new best model with validation loss: 83.244620


Epoch 85/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.205566


Epoch 85/100 [Train]:   5%|▍         | 208/4538 [00:05<01:52, 38.40it/s]

Batch 200/4538 | Loss: 92.905464


Epoch 85/100 [Train]:   9%|▉         | 408/4538 [00:10<01:47, 38.32it/s]

Batch 400/4538 | Loss: 75.877090


Epoch 85/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:42, 38.38it/s]

Batch 600/4538 | Loss: 73.613815


Epoch 85/100 [Train]:  18%|█▊        | 808/4538 [00:21<01:36, 38.46it/s]

Batch 800/4538 | Loss: 135.392303


Epoch 85/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:32, 38.34it/s]

Batch 1000/4538 | Loss: 78.087326


Epoch 85/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:20, 41.15it/s]

Batch 1200/4538 | Loss: 84.200577


Epoch 85/100 [Train]:  31%|███       | 1407/4538 [00:36<01:16, 40.70it/s]

Batch 1400/4538 | Loss: 88.983772


Epoch 85/100 [Train]:  35%|███▌      | 1606/4538 [00:41<01:15, 38.59it/s]

Batch 1600/4538 | Loss: 89.054840


Epoch 85/100 [Train]:  40%|███▉      | 1806/4538 [00:46<01:10, 38.52it/s]

Batch 1800/4538 | Loss: 95.231476


Epoch 85/100 [Train]:  44%|████▍     | 2006/4538 [00:51<01:05, 38.56it/s]

Batch 2000/4538 | Loss: 74.183235


Epoch 85/100 [Train]:  49%|████▊     | 2206/4538 [00:56<01:00, 38.45it/s]

Batch 2200/4538 | Loss: 93.059181


Epoch 85/100 [Train]:  53%|█████▎    | 2406/4538 [01:01<00:55, 38.51it/s]

Batch 2400/4538 | Loss: 76.855118


Epoch 85/100 [Train]:  57%|█████▋    | 2606/4538 [01:07<00:50, 38.55it/s]

Batch 2600/4538 | Loss: 84.049187


Epoch 85/100 [Train]:  62%|██████▏   | 2806/4538 [01:12<00:44, 38.59it/s]

Batch 2800/4538 | Loss: 73.588058


Epoch 85/100 [Train]:  66%|██████▌   | 3006/4538 [01:17<00:39, 38.55it/s]

Batch 3000/4538 | Loss: 82.448402


Epoch 85/100 [Train]:  71%|███████   | 3206/4538 [01:22<00:34, 38.75it/s]

Batch 3200/4538 | Loss: 77.596909


Epoch 85/100 [Train]:  75%|███████▌  | 3406/4538 [01:27<00:29, 38.53it/s]

Batch 3400/4538 | Loss: 87.038910


Epoch 85/100 [Train]:  79%|███████▉  | 3606/4538 [01:33<00:24, 38.68it/s]

Batch 3600/4538 | Loss: 76.916718


Epoch 85/100 [Train]:  84%|████████▍ | 3806/4538 [01:38<00:18, 38.61it/s]

Batch 3800/4538 | Loss: 80.703217


Epoch 85/100 [Train]:  88%|████████▊ | 4006/4538 [01:43<00:13, 38.67it/s]

Batch 4000/4538 | Loss: 80.114479


Epoch 85/100 [Train]:  93%|█████████▎| 4206/4538 [01:48<00:08, 38.50it/s]

Batch 4200/4538 | Loss: 102.323486


Epoch 85/100 [Train]:  97%|█████████▋| 4406/4538 [01:53<00:03, 38.60it/s]

Batch 4400/4538 | Loss: 78.394882


Epoch 85/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.71it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 131.20it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.81it/s]


Epoch 85/100 | Average Train Loss: 83.313545 | Average Validation Loss: 83.193286
Saved new best model with validation loss: 83.193286


Epoch 86/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 72.250435


Epoch 86/100 [Train]:   5%|▍         | 208/4538 [00:05<01:51, 38.75it/s]

Batch 200/4538 | Loss: 84.267448


Epoch 86/100 [Train]:   9%|▉         | 408/4538 [00:10<01:46, 38.78it/s]

Batch 400/4538 | Loss: 115.147598


Epoch 86/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:41, 38.68it/s]

Batch 600/4538 | Loss: 71.557709


Epoch 86/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:36, 38.61it/s]

Batch 800/4538 | Loss: 95.028442


Epoch 86/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:30, 38.83it/s]

Batch 1000/4538 | Loss: 82.573685


Epoch 86/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:25, 38.86it/s]

Batch 1200/4538 | Loss: 86.181252


Epoch 86/100 [Train]:  31%|███       | 1405/4538 [00:36<01:16, 40.79it/s]

Batch 1400/4538 | Loss: 86.070435


Epoch 86/100 [Train]:  35%|███▌      | 1605/4538 [00:41<01:11, 41.01it/s]

Batch 1600/4538 | Loss: 78.323723


Epoch 86/100 [Train]:  40%|███▉      | 1805/4538 [00:45<01:06, 40.88it/s]

Batch 1800/4538 | Loss: 93.471245


Epoch 86/100 [Train]:  44%|████▍     | 2007/4538 [00:51<01:05, 38.83it/s]

Batch 2000/4538 | Loss: 83.828659


Epoch 86/100 [Train]:  49%|████▊     | 2207/4538 [00:56<01:00, 38.81it/s]

Batch 2200/4538 | Loss: 73.394867


Epoch 86/100 [Train]:  53%|█████▎    | 2407/4538 [01:01<00:54, 38.79it/s]

Batch 2400/4538 | Loss: 86.249512


Epoch 86/100 [Train]:  57%|█████▋    | 2607/4538 [01:06<00:49, 38.74it/s]

Batch 2600/4538 | Loss: 74.618065


Epoch 86/100 [Train]:  62%|██████▏   | 2807/4538 [01:11<00:44, 38.76it/s]

Batch 2800/4538 | Loss: 105.735840


Epoch 86/100 [Train]:  66%|██████▋   | 3007/4538 [01:16<00:39, 38.62it/s]

Batch 3000/4538 | Loss: 76.946983


Epoch 86/100 [Train]:  71%|███████   | 3207/4538 [01:22<00:34, 38.62it/s]

Batch 3200/4538 | Loss: 81.278984


Epoch 86/100 [Train]:  75%|███████▌  | 3407/4538 [01:27<00:29, 38.71it/s]

Batch 3400/4538 | Loss: 96.529953


Epoch 86/100 [Train]:  79%|███████▉  | 3607/4538 [01:32<00:24, 38.56it/s]

Batch 3600/4538 | Loss: 87.441910


Epoch 86/100 [Train]:  84%|████████▍ | 3807/4538 [01:37<00:18, 38.60it/s]

Batch 3800/4538 | Loss: 112.046303


Epoch 86/100 [Train]:  88%|████████▊ | 4007/4538 [01:42<00:13, 38.73it/s]

Batch 4000/4538 | Loss: 81.356827


Epoch 86/100 [Train]:  93%|█████████▎| 4207/4538 [01:47<00:08, 38.64it/s]

Batch 4200/4538 | Loss: 63.632931


Epoch 86/100 [Train]:  97%|█████████▋| 4407/4538 [01:53<00:03, 38.47it/s]

Batch 4400/4538 | Loss: 86.035049


Epoch 86/100 [Train]: 100%|██████████| 4538/4538 [01:56<00:00, 38.95it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.89it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.40it/s]


Epoch 86/100 | Average Train Loss: 83.269638 | Average Validation Loss: 83.164210
Saved new best model with validation loss: 83.164210


Epoch 87/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 84.713211


Epoch 87/100 [Train]:   5%|▍         | 206/4538 [00:05<01:48, 40.03it/s]

Batch 200/4538 | Loss: 94.547249


Epoch 87/100 [Train]:   9%|▉         | 406/4538 [00:10<01:39, 41.33it/s]

Batch 400/4538 | Loss: 87.204292


Epoch 87/100 [Train]:  13%|█▎        | 606/4538 [00:14<01:35, 41.33it/s]

Batch 600/4538 | Loss: 76.611298


Epoch 87/100 [Train]:  18%|█▊        | 806/4538 [00:19<01:30, 41.46it/s]

Batch 800/4538 | Loss: 91.638023


Epoch 87/100 [Train]:  22%|██▏       | 1006/4538 [00:24<01:25, 41.18it/s]

Batch 1000/4538 | Loss: 99.163528


Epoch 87/100 [Train]:  27%|██▋       | 1206/4538 [00:29<01:20, 41.18it/s]

Batch 1200/4538 | Loss: 78.716301


Epoch 87/100 [Train]:  31%|███       | 1406/4538 [00:34<01:15, 41.30it/s]

Batch 1400/4538 | Loss: 77.175064


Epoch 87/100 [Train]:  35%|███▌      | 1606/4538 [00:39<01:11, 41.22it/s]

Batch 1600/4538 | Loss: 78.806885


Epoch 87/100 [Train]:  40%|███▉      | 1806/4538 [00:44<01:06, 41.32it/s]

Batch 1800/4538 | Loss: 88.534409


Epoch 87/100 [Train]:  44%|████▍     | 2006/4538 [00:48<01:01, 41.31it/s]

Batch 2000/4538 | Loss: 79.145599


Epoch 87/100 [Train]:  49%|████▊     | 2206/4538 [00:53<00:56, 41.45it/s]

Batch 2200/4538 | Loss: 83.266304


Epoch 87/100 [Train]:  53%|█████▎    | 2406/4538 [00:58<00:51, 41.49it/s]

Batch 2400/4538 | Loss: 98.232147


Epoch 87/100 [Train]:  57%|█████▋    | 2606/4538 [01:03<00:46, 41.26it/s]

Batch 2600/4538 | Loss: 85.447220


Epoch 87/100 [Train]:  62%|██████▏   | 2806/4538 [01:08<00:42, 41.04it/s]

Batch 2800/4538 | Loss: 81.704361


Epoch 87/100 [Train]:  66%|██████▌   | 3006/4538 [01:13<00:36, 41.41it/s]

Batch 3000/4538 | Loss: 88.314308


Epoch 87/100 [Train]:  71%|███████   | 3206/4538 [01:17<00:32, 41.29it/s]

Batch 3200/4538 | Loss: 71.291336


Epoch 87/100 [Train]:  75%|███████▌  | 3406/4538 [01:22<00:27, 41.29it/s]

Batch 3400/4538 | Loss: 88.684235


Epoch 87/100 [Train]:  79%|███████▉  | 3606/4538 [01:27<00:22, 41.22it/s]

Batch 3600/4538 | Loss: 65.950119


Epoch 87/100 [Train]:  84%|████████▍ | 3806/4538 [01:32<00:17, 41.04it/s]

Batch 3800/4538 | Loss: 80.371109


Epoch 87/100 [Train]:  88%|████████▊ | 4006/4538 [01:37<00:12, 41.29it/s]

Batch 4000/4538 | Loss: 59.501987


Epoch 87/100 [Train]:  93%|█████████▎| 4206/4538 [01:42<00:08, 41.07it/s]

Batch 4200/4538 | Loss: 87.441597


Epoch 87/100 [Train]:  97%|█████████▋| 4406/4538 [01:46<00:03, 41.17it/s]

Batch 4400/4538 | Loss: 79.900223


Epoch 87/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 41.20it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.20it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.53it/s]


Epoch 87/100 | Average Train Loss: 83.239563 | Average Validation Loss: 83.125956
Saved new best model with validation loss: 83.125956


Epoch 88/100 [Train]:   0%|          | 7/4538 [00:00<02:08, 35.20it/s]

Batch 0/4538 | Loss: 67.585777


Epoch 88/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.67it/s]

Batch 200/4538 | Loss: 88.614441


Epoch 88/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.74it/s]

Batch 400/4538 | Loss: 80.760963


Epoch 88/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:41, 38.56it/s]

Batch 600/4538 | Loss: 87.448189


Epoch 88/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.61it/s]

Batch 800/4538 | Loss: 75.722588


Epoch 88/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.66it/s]

Batch 1000/4538 | Loss: 79.166420


Epoch 88/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.54it/s]

Batch 1200/4538 | Loss: 79.819382


Epoch 88/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.39it/s]

Batch 1400/4538 | Loss: 82.751770


Epoch 88/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:15, 38.73it/s]

Batch 1600/4538 | Loss: 109.530869


Epoch 88/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:10, 38.58it/s]

Batch 1800/4538 | Loss: 85.574646


Epoch 88/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.48it/s]

Batch 2000/4538 | Loss: 64.345161


Epoch 88/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.54it/s]

Batch 2200/4538 | Loss: 82.259750


Epoch 88/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.62it/s]

Batch 2400/4538 | Loss: 89.995216


Epoch 88/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.52it/s]

Batch 2600/4538 | Loss: 83.245262


Epoch 88/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.52it/s]

Batch 2800/4538 | Loss: 87.705124


Epoch 88/100 [Train]:  66%|██████▋   | 3007/4538 [01:17<00:39, 38.50it/s]

Batch 3000/4538 | Loss: 61.189865


Epoch 88/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.65it/s]

Batch 3200/4538 | Loss: 82.887802


Epoch 88/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.61it/s]

Batch 3400/4538 | Loss: 87.611725


Epoch 88/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.57it/s]

Batch 3600/4538 | Loss: 88.776390


Epoch 88/100 [Train]:  84%|████████▍ | 3807/4538 [01:38<00:19, 37.62it/s]

Batch 3800/4538 | Loss: 85.164253


Epoch 88/100 [Train]:  88%|████████▊ | 4007/4538 [01:43<00:13, 38.76it/s]

Batch 4000/4538 | Loss: 82.485062


Epoch 88/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.81it/s]

Batch 4200/4538 | Loss: 83.324196


Epoch 88/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.62it/s]

Batch 4400/4538 | Loss: 76.921326


Epoch 88/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.57it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.63it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 100.77it/s]


Epoch 88/100 | Average Train Loss: 83.193771 | Average Validation Loss: 83.097258
Saved new best model with validation loss: 83.097258


Epoch 89/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 93.153534


Epoch 89/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.50it/s]

Batch 200/4538 | Loss: 80.268578


Epoch 89/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.65it/s]

Batch 400/4538 | Loss: 79.074333


Epoch 89/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:42, 38.39it/s]

Batch 600/4538 | Loss: 58.337273


Epoch 89/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.55it/s]

Batch 800/4538 | Loss: 84.177780


Epoch 89/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:31, 38.51it/s]

Batch 1000/4538 | Loss: 86.859474


Epoch 89/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.38it/s]

Batch 1200/4538 | Loss: 83.809227


Epoch 89/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.35it/s]

Batch 1400/4538 | Loss: 54.525429


Epoch 89/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.32it/s]

Batch 1600/4538 | Loss: 95.692093


Epoch 89/100 [Train]:  40%|███▉      | 1807/4538 [00:47<01:10, 38.47it/s]

Batch 1800/4538 | Loss: 89.942787


Epoch 89/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.47it/s]

Batch 2000/4538 | Loss: 72.936768


Epoch 89/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.39it/s]

Batch 2200/4538 | Loss: 85.664352


Epoch 89/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.53it/s]

Batch 2400/4538 | Loss: 71.960503


Epoch 89/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.46it/s]

Batch 2600/4538 | Loss: 84.748695


Epoch 89/100 [Train]:  62%|██████▏   | 2807/4538 [01:13<00:45, 38.37it/s]

Batch 2800/4538 | Loss: 79.490303


Epoch 89/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.45it/s]

Batch 3000/4538 | Loss: 86.933159


Epoch 89/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.59it/s]

Batch 3200/4538 | Loss: 97.223991


Epoch 89/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.19it/s]

Batch 3400/4538 | Loss: 84.505127


Epoch 89/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.37it/s]

Batch 3600/4538 | Loss: 60.179314


Epoch 89/100 [Train]:  84%|████████▍ | 3807/4538 [01:39<00:19, 38.39it/s]

Batch 3800/4538 | Loss: 76.363625


Epoch 89/100 [Train]:  88%|████████▊ | 4007/4538 [01:44<00:13, 38.22it/s]

Batch 4000/4538 | Loss: 88.759186


Epoch 89/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 38.60it/s]

Batch 4200/4538 | Loss: 84.428261


Epoch 89/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.59it/s]

Batch 4400/4538 | Loss: 86.771507


Epoch 89/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.49it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.66it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.90it/s]


Epoch 89/100 | Average Train Loss: 83.167480 | Average Validation Loss: 83.085844
Saved new best model with validation loss: 83.085844


Epoch 90/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 88.385010


Epoch 90/100 [Train]:   4%|▍         | 204/4538 [00:05<01:45, 40.94it/s]

Batch 200/4538 | Loss: 88.338852


Epoch 90/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 40.98it/s]

Batch 400/4538 | Loss: 101.033318


Epoch 90/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.08it/s]

Batch 600/4538 | Loss: 82.054543


Epoch 90/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.14it/s]

Batch 800/4538 | Loss: 92.907562


Epoch 90/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.78it/s]

Batch 1000/4538 | Loss: 98.692818


Epoch 90/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.21it/s]

Batch 1200/4538 | Loss: 77.818169


Epoch 90/100 [Train]:  31%|███       | 1404/4538 [00:34<01:16, 40.90it/s]

Batch 1400/4538 | Loss: 84.218681


Epoch 90/100 [Train]:  35%|███▌      | 1604/4538 [00:39<01:11, 40.80it/s]

Batch 1600/4538 | Loss: 83.580429


Epoch 90/100 [Train]:  40%|███▉      | 1804/4538 [00:44<01:06, 40.96it/s]

Batch 1800/4538 | Loss: 85.795677


Epoch 90/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.22it/s]

Batch 2000/4538 | Loss: 72.864990


Epoch 90/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.04it/s]

Batch 2200/4538 | Loss: 71.674301


Epoch 90/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:52, 40.97it/s]

Batch 2400/4538 | Loss: 74.062477


Epoch 90/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:48, 40.06it/s]

Batch 2600/4538 | Loss: 67.308243


Epoch 90/100 [Train]:  62%|██████▏   | 2804/4538 [01:08<00:42, 40.88it/s]

Batch 2800/4538 | Loss: 72.060722


Epoch 90/100 [Train]:  66%|██████▌   | 3004/4538 [01:13<00:37, 40.88it/s]

Batch 3000/4538 | Loss: 90.416840


Epoch 90/100 [Train]:  71%|███████   | 3204/4538 [01:18<00:32, 40.77it/s]

Batch 3200/4538 | Loss: 66.456581


Epoch 90/100 [Train]:  75%|███████▌  | 3404/4538 [01:23<00:27, 40.90it/s]

Batch 3400/4538 | Loss: 73.543877


Epoch 90/100 [Train]:  79%|███████▉  | 3604/4538 [01:27<00:22, 40.97it/s]

Batch 3600/4538 | Loss: 81.867310


Epoch 90/100 [Train]:  84%|████████▍ | 3804/4538 [01:32<00:18, 39.07it/s]

Batch 3800/4538 | Loss: 83.751488


Epoch 90/100 [Train]:  88%|████████▊ | 4008/4538 [01:37<00:12, 41.10it/s]

Batch 4000/4538 | Loss: 92.973991


Epoch 90/100 [Train]:  93%|█████████▎| 4208/4538 [01:42<00:07, 41.32it/s]

Batch 4200/4538 | Loss: 84.717224


Epoch 90/100 [Train]:  97%|█████████▋| 4408/4538 [01:47<00:03, 41.18it/s]

Batch 4400/4538 | Loss: 74.854225


Epoch 90/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.98it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.82it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.18it/s]


Epoch 90/100 | Average Train Loss: 83.141037 | Average Validation Loss: 83.066352
Saved new best model with validation loss: 83.066352


Epoch 91/100 [Train]:   0%|          | 8/4538 [00:00<02:08, 35.39it/s]

Batch 0/4538 | Loss: 94.968613


Epoch 91/100 [Train]:   5%|▍         | 208/4538 [00:05<01:45, 41.02it/s]

Batch 200/4538 | Loss: 75.168968


Epoch 91/100 [Train]:   9%|▉         | 408/4538 [00:09<01:40, 41.18it/s]

Batch 400/4538 | Loss: 100.140877


Epoch 91/100 [Train]:  13%|█▎        | 608/4538 [00:14<01:35, 40.95it/s]

Batch 600/4538 | Loss: 72.102654


Epoch 91/100 [Train]:  18%|█▊        | 808/4538 [00:19<01:31, 40.81it/s]

Batch 800/4538 | Loss: 77.860992


Epoch 91/100 [Train]:  22%|██▏       | 1008/4538 [00:24<01:26, 41.04it/s]

Batch 1000/4538 | Loss: 84.687088


Epoch 91/100 [Train]:  27%|██▋       | 1208/4538 [00:29<01:21, 40.88it/s]

Batch 1200/4538 | Loss: 77.222862


Epoch 91/100 [Train]:  31%|███       | 1408/4538 [00:34<01:16, 41.00it/s]

Batch 1400/4538 | Loss: 84.455742


Epoch 91/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:11, 40.77it/s]

Batch 1600/4538 | Loss: 92.680962


Epoch 91/100 [Train]:  40%|███▉      | 1808/4538 [00:44<01:06, 40.94it/s]

Batch 1800/4538 | Loss: 80.613609


Epoch 91/100 [Train]:  44%|████▍     | 2008/4538 [00:49<01:01, 41.14it/s]

Batch 2000/4538 | Loss: 82.908737


Epoch 91/100 [Train]:  49%|████▊     | 2208/4538 [00:53<00:56, 41.21it/s]

Batch 2200/4538 | Loss: 94.152870


Epoch 91/100 [Train]:  53%|█████▎    | 2408/4538 [00:58<00:52, 40.76it/s]

Batch 2400/4538 | Loss: 83.108208


Epoch 91/100 [Train]:  57%|█████▋    | 2607/4538 [01:03<00:46, 41.25it/s]

Batch 2600/4538 | Loss: 82.160080


Epoch 91/100 [Train]:  62%|██████▏   | 2807/4538 [01:08<00:42, 41.10it/s]

Batch 2800/4538 | Loss: 80.635780


Epoch 91/100 [Train]:  66%|██████▋   | 3007/4538 [01:13<00:37, 41.10it/s]

Batch 3000/4538 | Loss: 83.941589


Epoch 91/100 [Train]:  71%|███████   | 3207/4538 [01:18<00:32, 40.91it/s]

Batch 3200/4538 | Loss: 69.289383


Epoch 91/100 [Train]:  75%|███████▌  | 3407/4538 [01:23<00:27, 41.08it/s]

Batch 3400/4538 | Loss: 64.335670


Epoch 91/100 [Train]:  79%|███████▉  | 3607/4538 [01:28<00:22, 40.87it/s]

Batch 3600/4538 | Loss: 75.319641


Epoch 91/100 [Train]:  84%|████████▍ | 3806/4538 [01:32<00:17, 41.20it/s]

Batch 3800/4538 | Loss: 81.820778


Epoch 91/100 [Train]:  88%|████████▊ | 4006/4538 [01:37<00:13, 40.84it/s]

Batch 4000/4538 | Loss: 70.598167


Epoch 91/100 [Train]:  93%|█████████▎| 4206/4538 [01:42<00:08, 40.69it/s]

Batch 4200/4538 | Loss: 75.895767


Epoch 91/100 [Train]:  97%|█████████▋| 4406/4538 [01:47<00:03, 41.12it/s]

Batch 4400/4538 | Loss: 72.871269


Epoch 91/100 [Train]: 100%|██████████| 4538/4538 [01:50<00:00, 40.97it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 131.71it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.34it/s]


Epoch 91/100 | Average Train Loss: 83.118073 | Average Validation Loss: 83.051097
Saved new best model with validation loss: 83.051097


Epoch 92/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 88.870598


Epoch 92/100 [Train]:   4%|▍         | 204/4538 [00:05<01:46, 40.83it/s]

Batch 200/4538 | Loss: 81.229630


Epoch 92/100 [Train]:   9%|▉         | 404/4538 [00:09<01:41, 40.91it/s]

Batch 400/4538 | Loss: 81.499962


Epoch 92/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:36, 40.74it/s]

Batch 600/4538 | Loss: 92.330124


Epoch 92/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:31, 41.01it/s]

Batch 800/4538 | Loss: 79.286819


Epoch 92/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:26, 40.79it/s]

Batch 1000/4538 | Loss: 73.439743


Epoch 92/100 [Train]:  27%|██▋       | 1207/4538 [00:29<01:21, 41.01it/s]

Batch 1200/4538 | Loss: 92.796616


Epoch 92/100 [Train]:  31%|███       | 1407/4538 [00:34<01:16, 40.97it/s]

Batch 1400/4538 | Loss: 91.703583


Epoch 92/100 [Train]:  35%|███▌      | 1607/4538 [00:39<01:11, 40.73it/s]

Batch 1600/4538 | Loss: 87.637207


Epoch 92/100 [Train]:  40%|███▉      | 1807/4538 [00:44<01:07, 40.68it/s]

Batch 1800/4538 | Loss: 81.774437


Epoch 92/100 [Train]:  44%|████▍     | 2007/4538 [00:49<01:01, 40.86it/s]

Batch 2000/4538 | Loss: 89.209007


Epoch 92/100 [Train]:  49%|████▊     | 2207/4538 [00:54<00:56, 41.01it/s]

Batch 2200/4538 | Loss: 85.766586


Epoch 92/100 [Train]:  53%|█████▎    | 2405/4538 [00:59<00:52, 40.75it/s]

Batch 2400/4538 | Loss: 80.638580


Epoch 92/100 [Train]:  57%|█████▋    | 2605/4538 [01:03<00:47, 40.91it/s]

Batch 2600/4538 | Loss: 67.315437


Epoch 92/100 [Train]:  62%|██████▏   | 2805/4538 [01:08<00:42, 40.85it/s]

Batch 2800/4538 | Loss: 82.008247


Epoch 92/100 [Train]:  66%|██████▌   | 3005/4538 [01:13<00:37, 40.92it/s]

Batch 3000/4538 | Loss: 66.758987


Epoch 92/100 [Train]:  71%|███████   | 3205/4538 [01:18<00:32, 40.79it/s]

Batch 3200/4538 | Loss: 83.590469


Epoch 92/100 [Train]:  75%|███████▌  | 3405/4538 [01:23<00:27, 40.52it/s]

Batch 3400/4538 | Loss: 61.724499


Epoch 92/100 [Train]:  80%|███████▉  | 3608/4538 [01:28<00:22, 40.71it/s]

Batch 3600/4538 | Loss: 74.670326


Epoch 92/100 [Train]:  84%|████████▍ | 3808/4538 [01:33<00:17, 40.90it/s]

Batch 3800/4538 | Loss: 97.506157


Epoch 92/100 [Train]:  88%|████████▊ | 4008/4538 [01:38<00:12, 40.86it/s]

Batch 4000/4538 | Loss: 95.997925


Epoch 92/100 [Train]:  93%|█████████▎| 4208/4538 [01:43<00:08, 40.76it/s]

Batch 4200/4538 | Loss: 97.791328


Epoch 92/100 [Train]:  97%|█████████▋| 4408/4538 [01:48<00:03, 40.66it/s]

Batch 4400/4538 | Loss: 83.332130


Epoch 92/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.75it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 130.02it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.70it/s]


Epoch 92/100 | Average Train Loss: 83.109718 | Average Validation Loss: 83.043878
Saved new best model with validation loss: 83.043878


Epoch 93/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 85.485687


Epoch 93/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.06it/s]

Batch 200/4538 | Loss: 85.963707


Epoch 93/100 [Train]:   9%|▉         | 408/4538 [00:09<01:40, 40.98it/s]

Batch 400/4538 | Loss: 85.386368


Epoch 93/100 [Train]:  13%|█▎        | 608/4538 [00:14<01:35, 40.98it/s]

Batch 600/4538 | Loss: 87.192017


Epoch 93/100 [Train]:  18%|█▊        | 808/4538 [00:19<01:31, 40.89it/s]

Batch 800/4538 | Loss: 86.009453


Epoch 93/100 [Train]:  22%|██▏       | 1008/4538 [00:24<01:26, 40.82it/s]

Batch 1000/4538 | Loss: 79.190002


Epoch 93/100 [Train]:  27%|██▋       | 1208/4538 [00:29<01:21, 40.76it/s]

Batch 1200/4538 | Loss: 80.628036


Epoch 93/100 [Train]:  31%|███       | 1408/4538 [00:34<01:16, 40.72it/s]

Batch 1400/4538 | Loss: 79.529358


Epoch 93/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:11, 40.79it/s]

Batch 1600/4538 | Loss: 75.536697


Epoch 93/100 [Train]:  40%|███▉      | 1808/4538 [00:44<01:06, 40.91it/s]

Batch 1800/4538 | Loss: 87.362595


Epoch 93/100 [Train]:  44%|████▍     | 2008/4538 [00:49<01:01, 40.81it/s]

Batch 2000/4538 | Loss: 80.522614


Epoch 93/100 [Train]:  49%|████▊     | 2207/4538 [00:54<00:56, 41.21it/s]

Batch 2200/4538 | Loss: 100.029259


Epoch 93/100 [Train]:  53%|█████▎    | 2407/4538 [00:58<00:52, 40.72it/s]

Batch 2400/4538 | Loss: 80.202408


Epoch 93/100 [Train]:  57%|█████▋    | 2607/4538 [01:03<00:47, 40.80it/s]

Batch 2600/4538 | Loss: 99.573784


Epoch 93/100 [Train]:  62%|██████▏   | 2807/4538 [01:08<00:42, 40.80it/s]

Batch 2800/4538 | Loss: 98.227493


Epoch 93/100 [Train]:  66%|██████▌   | 2997/4538 [01:13<00:37, 40.94it/s]

Batch 3000/4538 | Loss: 88.448067


Epoch 93/100 [Train]:  71%|███████   | 3207/4538 [01:18<00:32, 41.04it/s]

Batch 3200/4538 | Loss: 90.957451


Epoch 93/100 [Train]:  75%|███████▌  | 3407/4538 [01:23<00:27, 40.52it/s]

Batch 3400/4538 | Loss: 80.933823


Epoch 93/100 [Train]:  79%|███████▉  | 3607/4538 [01:28<00:22, 40.67it/s]

Batch 3600/4538 | Loss: 65.765205


Epoch 93/100 [Train]:  84%|████████▍ | 3807/4538 [01:33<00:17, 40.65it/s]

Batch 3800/4538 | Loss: 96.301277


Epoch 93/100 [Train]:  88%|████████▊ | 4007/4538 [01:38<00:13, 39.96it/s]

Batch 4000/4538 | Loss: 77.934639


Epoch 93/100 [Train]:  93%|█████████▎| 4207/4538 [01:43<00:08, 40.69it/s]

Batch 4200/4538 | Loss: 82.293114


Epoch 93/100 [Train]:  97%|█████████▋| 4407/4538 [01:47<00:03, 40.94it/s]

Batch 4400/4538 | Loss: 57.362068


Epoch 93/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.85it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 132.51it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.14it/s]


Epoch 93/100 | Average Train Loss: 83.092796 | Average Validation Loss: 83.033327
Saved new best model with validation loss: 83.033327


Epoch 94/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 79.218033


Epoch 94/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.55it/s]

Batch 200/4538 | Loss: 83.987282


Epoch 94/100 [Train]:   9%|▉         | 407/4538 [00:10<01:46, 38.68it/s]

Batch 400/4538 | Loss: 71.629692


Epoch 94/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:41, 38.67it/s]

Batch 600/4538 | Loss: 82.295151


Epoch 94/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:37, 38.36it/s]

Batch 800/4538 | Loss: 112.505890


Epoch 94/100 [Train]:  22%|██▏       | 1007/4538 [00:26<01:32, 38.32it/s]

Batch 1000/4538 | Loss: 75.873123


Epoch 94/100 [Train]:  27%|██▋       | 1207/4538 [00:31<01:26, 38.54it/s]

Batch 1200/4538 | Loss: 84.756187


Epoch 94/100 [Train]:  31%|███       | 1407/4538 [00:36<01:21, 38.43it/s]

Batch 1400/4538 | Loss: 73.729034


Epoch 94/100 [Train]:  35%|███▌      | 1607/4538 [00:41<01:16, 38.46it/s]

Batch 1600/4538 | Loss: 76.833382


Epoch 94/100 [Train]:  40%|███▉      | 1807/4538 [00:46<01:11, 38.27it/s]

Batch 1800/4538 | Loss: 73.850792


Epoch 94/100 [Train]:  44%|████▍     | 2007/4538 [00:52<01:05, 38.70it/s]

Batch 2000/4538 | Loss: 86.066162


Epoch 94/100 [Train]:  49%|████▊     | 2207/4538 [00:57<01:00, 38.48it/s]

Batch 2200/4538 | Loss: 82.892876


Epoch 94/100 [Train]:  53%|█████▎    | 2407/4538 [01:02<00:55, 38.52it/s]

Batch 2400/4538 | Loss: 90.256943


Epoch 94/100 [Train]:  57%|█████▋    | 2607/4538 [01:07<00:50, 38.49it/s]

Batch 2600/4538 | Loss: 67.123810


Epoch 94/100 [Train]:  62%|██████▏   | 2807/4538 [01:12<00:44, 38.51it/s]

Batch 2800/4538 | Loss: 102.455101


Epoch 94/100 [Train]:  66%|██████▋   | 3007/4538 [01:18<00:39, 38.53it/s]

Batch 3000/4538 | Loss: 80.692795


Epoch 94/100 [Train]:  71%|███████   | 3207/4538 [01:23<00:34, 38.44it/s]

Batch 3200/4538 | Loss: 97.897758


Epoch 94/100 [Train]:  75%|███████▌  | 3407/4538 [01:28<00:29, 38.44it/s]

Batch 3400/4538 | Loss: 81.828560


Epoch 94/100 [Train]:  79%|███████▉  | 3607/4538 [01:33<00:24, 38.49it/s]

Batch 3600/4538 | Loss: 84.725479


Epoch 94/100 [Train]:  84%|████████▍ | 3807/4538 [01:39<00:18, 38.58it/s]

Batch 3800/4538 | Loss: 111.928535


Epoch 94/100 [Train]:  88%|████████▊ | 4007/4538 [01:44<00:13, 38.63it/s]

Batch 4000/4538 | Loss: 77.327423


Epoch 94/100 [Train]:  93%|█████████▎| 4207/4538 [01:49<00:08, 37.83it/s]

Batch 4200/4538 | Loss: 81.234612


Epoch 94/100 [Train]:  97%|█████████▋| 4407/4538 [01:54<00:03, 38.56it/s]

Batch 4400/4538 | Loss: 83.632942


Epoch 94/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.48it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.97it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.22it/s]


Epoch 94/100 | Average Train Loss: 83.080315 | Average Validation Loss: 83.022683
Saved new best model with validation loss: 83.022683


Epoch 95/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 80.781982


Epoch 95/100 [Train]:   5%|▍         | 208/4538 [00:05<01:52, 38.59it/s]

Batch 200/4538 | Loss: 92.012009


Epoch 95/100 [Train]:   9%|▉         | 408/4538 [00:10<01:47, 38.36it/s]

Batch 400/4538 | Loss: 88.762329


Epoch 95/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:42, 38.48it/s]

Batch 600/4538 | Loss: 84.123314


Epoch 95/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:36, 38.66it/s]

Batch 800/4538 | Loss: 99.887398


Epoch 95/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:31, 38.63it/s]

Batch 1000/4538 | Loss: 92.938034


Epoch 95/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:26, 38.60it/s]

Batch 1200/4538 | Loss: 85.436432


Epoch 95/100 [Train]:  31%|███       | 1408/4538 [00:36<01:20, 38.72it/s]

Batch 1400/4538 | Loss: 86.608437


Epoch 95/100 [Train]:  35%|███▌      | 1608/4538 [00:41<01:15, 38.67it/s]

Batch 1600/4538 | Loss: 81.125702


Epoch 95/100 [Train]:  40%|███▉      | 1808/4538 [00:46<01:10, 38.86it/s]

Batch 1800/4538 | Loss: 91.728485


Epoch 95/100 [Train]:  44%|████▍     | 2008/4538 [00:52<01:05, 38.85it/s]

Batch 2000/4538 | Loss: 77.526665


Epoch 95/100 [Train]:  49%|████▊     | 2208/4538 [00:57<01:00, 38.80it/s]

Batch 2200/4538 | Loss: 92.558937


Epoch 95/100 [Train]:  53%|█████▎    | 2408/4538 [01:02<00:54, 38.79it/s]

Batch 2400/4538 | Loss: 89.975540


Epoch 95/100 [Train]:  57%|█████▋    | 2608/4538 [01:07<00:49, 38.72it/s]

Batch 2600/4538 | Loss: 71.935143


Epoch 95/100 [Train]:  62%|██████▏   | 2808/4538 [01:12<00:44, 38.69it/s]

Batch 2800/4538 | Loss: 88.787552


Epoch 95/100 [Train]:  66%|██████▋   | 3008/4538 [01:17<00:39, 38.80it/s]

Batch 3000/4538 | Loss: 74.816963


Epoch 95/100 [Train]:  71%|███████   | 3208/4538 [01:22<00:34, 38.68it/s]

Batch 3200/4538 | Loss: 62.175900


Epoch 95/100 [Train]:  75%|███████▌  | 3408/4538 [01:28<00:29, 38.65it/s]

Batch 3400/4538 | Loss: 71.531349


Epoch 95/100 [Train]:  80%|███████▉  | 3608/4538 [01:33<00:23, 38.75it/s]

Batch 3600/4538 | Loss: 62.268620


Epoch 95/100 [Train]:  84%|████████▍ | 3808/4538 [01:38<00:18, 39.04it/s]

Batch 3800/4538 | Loss: 87.749001


Epoch 95/100 [Train]:  88%|████████▊ | 4008/4538 [01:43<00:13, 38.57it/s]

Batch 4000/4538 | Loss: 103.491554


Epoch 95/100 [Train]:  93%|█████████▎| 4208/4538 [01:48<00:08, 38.65it/s]

Batch 4200/4538 | Loss: 80.257339


Epoch 95/100 [Train]:  97%|█████████▋| 4408/4538 [01:54<00:03, 38.61it/s]

Batch 4400/4538 | Loss: 86.754501


Epoch 95/100 [Train]: 100%|██████████| 4538/4538 [01:57<00:00, 38.66it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 134.23it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.88it/s]


Epoch 95/100 | Average Train Loss: 83.076202 | Average Validation Loss: 83.014819
Saved new best model with validation loss: 83.014819


Epoch 96/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 91.992645


Epoch 96/100 [Train]:   5%|▍         | 206/4538 [00:05<01:44, 41.28it/s]

Batch 200/4538 | Loss: 76.357552


Epoch 96/100 [Train]:   9%|▉         | 406/4538 [00:10<01:39, 41.51it/s]

Batch 400/4538 | Loss: 76.128922


Epoch 96/100 [Train]:  13%|█▎        | 606/4538 [00:14<01:35, 41.33it/s]

Batch 600/4538 | Loss: 82.385521


Epoch 96/100 [Train]:  18%|█▊        | 807/4538 [00:20<01:36, 38.73it/s]

Batch 800/4538 | Loss: 93.731064


Epoch 96/100 [Train]:  22%|██▏       | 1007/4538 [00:25<01:31, 38.73it/s]

Batch 1000/4538 | Loss: 104.776131


Epoch 96/100 [Train]:  27%|██▋       | 1207/4538 [00:30<01:26, 38.49it/s]

Batch 1200/4538 | Loss: 81.381264


Epoch 96/100 [Train]:  31%|███       | 1407/4538 [00:35<01:20, 38.89it/s]

Batch 1400/4538 | Loss: 90.651070


Epoch 96/100 [Train]:  35%|███▌      | 1607/4538 [00:40<01:15, 38.64it/s]

Batch 1600/4538 | Loss: 86.814003


Epoch 96/100 [Train]:  40%|███▉      | 1807/4538 [00:45<01:12, 37.78it/s]

Batch 1800/4538 | Loss: 77.633255


Epoch 96/100 [Train]:  44%|████▍     | 2007/4538 [00:51<01:05, 38.63it/s]

Batch 2000/4538 | Loss: 101.344421


Epoch 96/100 [Train]:  49%|████▊     | 2207/4538 [00:56<01:00, 38.63it/s]

Batch 2200/4538 | Loss: 78.098305


Epoch 96/100 [Train]:  53%|█████▎    | 2407/4538 [01:01<00:55, 38.24it/s]

Batch 2400/4538 | Loss: 71.288269


Epoch 96/100 [Train]:  57%|█████▋    | 2607/4538 [01:06<00:49, 38.79it/s]

Batch 2600/4538 | Loss: 83.082916


Epoch 96/100 [Train]:  62%|██████▏   | 2807/4538 [01:11<00:44, 38.88it/s]

Batch 2800/4538 | Loss: 77.872940


Epoch 96/100 [Train]:  66%|██████▋   | 3007/4538 [01:16<00:39, 38.73it/s]

Batch 3000/4538 | Loss: 81.109322


Epoch 96/100 [Train]:  71%|███████   | 3207/4538 [01:22<00:34, 38.43it/s]

Batch 3200/4538 | Loss: 85.688438


Epoch 96/100 [Train]:  75%|███████▌  | 3406/4538 [01:26<00:27, 41.65it/s]

Batch 3400/4538 | Loss: 87.680092


Epoch 96/100 [Train]:  79%|███████▉  | 3605/4538 [01:31<00:22, 41.23it/s]

Batch 3600/4538 | Loss: 80.624840


Epoch 96/100 [Train]:  84%|████████▍ | 3804/4538 [01:36<00:17, 41.26it/s]

Batch 3800/4538 | Loss: 90.953537


Epoch 96/100 [Train]:  88%|████████▊ | 4004/4538 [01:41<00:12, 41.28it/s]

Batch 4000/4538 | Loss: 73.074623


Epoch 96/100 [Train]:  93%|█████████▎| 4204/4538 [01:46<00:08, 40.77it/s]

Batch 4200/4538 | Loss: 67.694130


Epoch 96/100 [Train]:  97%|█████████▋| 4404/4538 [01:51<00:03, 41.51it/s]

Batch 4400/4538 | Loss: 72.380066


Epoch 96/100 [Train]: 100%|██████████| 4538/4538 [01:54<00:00, 39.69it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 134.21it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.16it/s]


Epoch 96/100 | Average Train Loss: 83.058922 | Average Validation Loss: 83.010817
Saved new best model with validation loss: 83.010817


Epoch 97/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.503403


Epoch 97/100 [Train]:   4%|▍         | 204/4538 [00:04<01:45, 41.07it/s]

Batch 200/4538 | Loss: 109.080971


Epoch 97/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.06it/s]

Batch 400/4538 | Loss: 98.522354


Epoch 97/100 [Train]:  13%|█▎        | 605/4538 [00:14<01:41, 38.71it/s]

Batch 600/4538 | Loss: 82.289597


Epoch 97/100 [Train]:  18%|█▊        | 805/4538 [00:19<01:36, 38.68it/s]

Batch 800/4538 | Loss: 80.331963


Epoch 97/100 [Train]:  22%|██▏       | 1005/4538 [00:25<01:33, 37.96it/s]

Batch 1000/4538 | Loss: 105.842712


Epoch 97/100 [Train]:  27%|██▋       | 1205/4538 [00:30<01:26, 38.65it/s]

Batch 1200/4538 | Loss: 91.823318


Epoch 97/100 [Train]:  31%|███       | 1406/4538 [00:35<01:16, 41.07it/s]

Batch 1400/4538 | Loss: 80.718445


Epoch 97/100 [Train]:  35%|███▌      | 1606/4538 [00:40<01:11, 40.84it/s]

Batch 1600/4538 | Loss: 88.693321


Epoch 97/100 [Train]:  40%|███▉      | 1806/4538 [00:45<01:06, 41.23it/s]

Batch 1800/4538 | Loss: 66.314667


Epoch 97/100 [Train]:  44%|████▍     | 2006/4538 [00:50<01:01, 41.12it/s]

Batch 2000/4538 | Loss: 77.970398


Epoch 97/100 [Train]:  49%|████▊     | 2206/4538 [00:54<00:58, 39.94it/s]

Batch 2200/4538 | Loss: 89.406143


Epoch 97/100 [Train]:  53%|█████▎    | 2406/4538 [00:59<00:51, 41.27it/s]

Batch 2400/4538 | Loss: 87.132751


Epoch 97/100 [Train]:  57%|█████▋    | 2606/4538 [01:04<00:46, 41.62it/s]

Batch 2600/4538 | Loss: 96.559082


Epoch 97/100 [Train]:  62%|██████▏   | 2806/4538 [01:09<00:41, 41.44it/s]

Batch 2800/4538 | Loss: 92.986725


Epoch 97/100 [Train]:  66%|██████▌   | 3006/4538 [01:14<00:36, 41.79it/s]

Batch 3000/4538 | Loss: 116.157501


Epoch 97/100 [Train]:  71%|███████   | 3206/4538 [01:18<00:32, 41.29it/s]

Batch 3200/4538 | Loss: 78.577232


Epoch 97/100 [Train]:  75%|███████▌  | 3406/4538 [01:23<00:27, 41.63it/s]

Batch 3400/4538 | Loss: 77.531273


Epoch 97/100 [Train]:  79%|███████▉  | 3605/4538 [01:28<00:22, 41.09it/s]

Batch 3600/4538 | Loss: 86.591843


Epoch 97/100 [Train]:  84%|████████▍ | 3805/4538 [01:33<00:17, 41.25it/s]

Batch 3800/4538 | Loss: 76.223755


Epoch 97/100 [Train]:  88%|████████▊ | 4005/4538 [01:38<00:12, 41.19it/s]

Batch 4000/4538 | Loss: 78.036148


Epoch 97/100 [Train]:  93%|█████████▎| 4205/4538 [01:43<00:08, 41.18it/s]

Batch 4200/4538 | Loss: 93.173805


Epoch 97/100 [Train]:  97%|█████████▋| 4405/4538 [01:48<00:03, 41.32it/s]

Batch 4400/4538 | Loss: 78.486969


Epoch 97/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.79it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.24it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 102.20it/s]


Epoch 97/100 | Average Train Loss: 83.059685 | Average Validation Loss: 83.008241
Saved new best model with validation loss: 83.008241


Epoch 98/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 90.538780


Epoch 98/100 [Train]:   4%|▍         | 204/4538 [00:04<01:44, 41.30it/s]

Batch 200/4538 | Loss: 94.913193


Epoch 98/100 [Train]:   9%|▉         | 404/4538 [00:09<01:40, 41.31it/s]

Batch 400/4538 | Loss: 94.542892


Epoch 98/100 [Train]:  13%|█▎        | 604/4538 [00:14<01:35, 41.31it/s]

Batch 600/4538 | Loss: 74.920876


Epoch 98/100 [Train]:  18%|█▊        | 804/4538 [00:19<01:30, 41.28it/s]

Batch 800/4538 | Loss: 80.434280


Epoch 98/100 [Train]:  22%|██▏       | 1004/4538 [00:24<01:25, 41.30it/s]

Batch 1000/4538 | Loss: 91.467842


Epoch 98/100 [Train]:  27%|██▋       | 1204/4538 [00:29<01:20, 41.35it/s]

Batch 1200/4538 | Loss: 74.667999


Epoch 98/100 [Train]:  31%|███       | 1404/4538 [00:33<01:15, 41.36it/s]

Batch 1400/4538 | Loss: 75.510323


Epoch 98/100 [Train]:  35%|███▌      | 1604/4538 [00:38<01:11, 40.99it/s]

Batch 1600/4538 | Loss: 105.687340


Epoch 98/100 [Train]:  40%|███▉      | 1804/4538 [00:43<01:06, 41.42it/s]

Batch 1800/4538 | Loss: 80.000313


Epoch 98/100 [Train]:  44%|████▍     | 2004/4538 [00:48<01:01, 41.35it/s]

Batch 2000/4538 | Loss: 76.505730


Epoch 98/100 [Train]:  49%|████▊     | 2204/4538 [00:53<00:56, 41.22it/s]

Batch 2200/4538 | Loss: 79.462418


Epoch 98/100 [Train]:  53%|█████▎    | 2404/4538 [00:58<00:51, 41.22it/s]

Batch 2400/4538 | Loss: 85.133453


Epoch 98/100 [Train]:  57%|█████▋    | 2604/4538 [01:03<00:46, 41.33it/s]

Batch 2600/4538 | Loss: 94.872749


Epoch 98/100 [Train]:  62%|██████▏   | 2804/4538 [01:07<00:41, 41.29it/s]

Batch 2800/4538 | Loss: 84.286377


Epoch 98/100 [Train]:  66%|██████▌   | 3004/4538 [01:12<00:37, 41.27it/s]

Batch 3000/4538 | Loss: 80.901382


Epoch 98/100 [Train]:  71%|███████   | 3204/4538 [01:17<00:32, 41.40it/s]

Batch 3200/4538 | Loss: 86.993767


Epoch 98/100 [Train]:  75%|███████▌  | 3408/4538 [01:22<00:27, 41.12it/s]

Batch 3400/4538 | Loss: 89.868843


Epoch 98/100 [Train]:  80%|███████▉  | 3608/4538 [01:27<00:22, 41.39it/s]

Batch 3600/4538 | Loss: 84.612213


Epoch 98/100 [Train]:  84%|████████▍ | 3808/4538 [01:32<00:17, 41.45it/s]

Batch 3800/4538 | Loss: 86.780678


Epoch 98/100 [Train]:  88%|████████▊ | 4008/4538 [01:37<00:13, 40.73it/s]

Batch 4000/4538 | Loss: 62.799061


Epoch 98/100 [Train]:  93%|█████████▎| 4208/4538 [01:41<00:08, 41.21it/s]

Batch 4200/4538 | Loss: 76.338943


Epoch 98/100 [Train]:  97%|█████████▋| 4408/4538 [01:46<00:03, 41.84it/s]

Batch 4400/4538 | Loss: 77.399521


Epoch 98/100 [Train]: 100%|██████████| 4538/4538 [01:49<00:00, 41.32it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.86it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.08it/s]


Epoch 98/100 | Average Train Loss: 83.052742 | Average Validation Loss: 83.007029
Saved new best model with validation loss: 83.007029


Epoch 99/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 81.096573


Epoch 99/100 [Train]:   5%|▍         | 208/4538 [00:05<01:53, 38.03it/s]

Batch 200/4538 | Loss: 62.224258


Epoch 99/100 [Train]:   9%|▉         | 408/4538 [00:10<01:47, 38.57it/s]

Batch 400/4538 | Loss: 83.609695


Epoch 99/100 [Train]:  13%|█▎        | 608/4538 [00:15<01:42, 38.47it/s]

Batch 600/4538 | Loss: 59.572762


Epoch 99/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:36, 38.55it/s]

Batch 800/4538 | Loss: 92.796730


Epoch 99/100 [Train]:  22%|██▏       | 1008/4538 [00:26<01:31, 38.59it/s]

Batch 1000/4538 | Loss: 84.979805


Epoch 99/100 [Train]:  27%|██▋       | 1208/4538 [00:31<01:26, 38.63it/s]

Batch 1200/4538 | Loss: 57.403156


Epoch 99/100 [Train]:  31%|███       | 1408/4538 [00:36<01:21, 38.62it/s]

Batch 1400/4538 | Loss: 67.397728


Epoch 99/100 [Train]:  35%|███▌      | 1608/4538 [00:41<01:15, 38.86it/s]

Batch 1600/4538 | Loss: 81.149139


Epoch 99/100 [Train]:  40%|███▉      | 1808/4538 [00:46<01:09, 39.01it/s]

Batch 1800/4538 | Loss: 123.269554


Epoch 99/100 [Train]:  44%|████▍     | 2008/4538 [00:51<01:05, 38.75it/s]

Batch 2000/4538 | Loss: 86.091515


Epoch 99/100 [Train]:  49%|████▊     | 2206/4538 [00:56<00:56, 41.18it/s]

Batch 2200/4538 | Loss: 81.670219


Epoch 99/100 [Train]:  53%|█████▎    | 2406/4538 [01:01<00:51, 41.31it/s]

Batch 2400/4538 | Loss: 87.551392


Epoch 99/100 [Train]:  57%|█████▋    | 2606/4538 [01:06<00:46, 41.30it/s]

Batch 2600/4538 | Loss: 78.949348


Epoch 99/100 [Train]:  62%|██████▏   | 2806/4538 [01:11<00:41, 41.33it/s]

Batch 2800/4538 | Loss: 109.232048


Epoch 99/100 [Train]:  66%|██████▌   | 3006/4538 [01:16<00:37, 41.38it/s]

Batch 3000/4538 | Loss: 102.994781


Epoch 99/100 [Train]:  71%|███████   | 3205/4538 [01:21<00:32, 41.52it/s]

Batch 3200/4538 | Loss: 76.715759


Epoch 99/100 [Train]:  75%|███████▌  | 3405/4538 [01:25<00:27, 41.51it/s]

Batch 3400/4538 | Loss: 85.288200


Epoch 99/100 [Train]:  79%|███████▉  | 3605/4538 [01:30<00:22, 41.57it/s]

Batch 3600/4538 | Loss: 78.183121


Epoch 99/100 [Train]:  84%|████████▍ | 3805/4538 [01:35<00:17, 41.01it/s]

Batch 3800/4538 | Loss: 79.191628


Epoch 99/100 [Train]:  88%|████████▊ | 4005/4538 [01:40<00:12, 41.31it/s]

Batch 4000/4538 | Loss: 97.188164


Epoch 99/100 [Train]:  93%|█████████▎| 4205/4538 [01:45<00:08, 41.02it/s]

Batch 4200/4538 | Loss: 85.022942


Epoch 99/100 [Train]:  97%|█████████▋| 4405/4538 [01:50<00:03, 39.27it/s]

Batch 4400/4538 | Loss: 78.184731


Epoch 99/100 [Train]: 100%|██████████| 4538/4538 [01:53<00:00, 40.02it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:34<00:00, 133.28it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 101.20it/s]


Epoch 99/100 | Average Train Loss: 83.049301 | Average Validation Loss: 83.006657
Saved new best model with validation loss: 83.006657


Epoch 100/100 [Train]:   0%|          | 0/4538 [00:00<?, ?it/s]

Batch 0/4538 | Loss: 78.322754


Epoch 100/100 [Train]:   5%|▍         | 207/4538 [00:05<01:52, 38.64it/s]

Batch 200/4538 | Loss: 100.581909


Epoch 100/100 [Train]:   9%|▉         | 407/4538 [00:10<01:39, 41.53it/s]

Batch 400/4538 | Loss: 83.444046


Epoch 100/100 [Train]:  13%|█▎        | 607/4538 [00:15<01:36, 40.70it/s]

Batch 600/4538 | Loss: 95.579216


Epoch 100/100 [Train]:  18%|█▊        | 808/4538 [00:20<01:30, 41.27it/s]

Batch 800/4538 | Loss: 95.973099


Epoch 100/100 [Train]:  22%|██▏       | 1008/4538 [00:25<01:25, 41.26it/s]

Batch 1000/4538 | Loss: 90.958496


Epoch 100/100 [Train]:  27%|██▋       | 1208/4538 [00:29<01:22, 40.41it/s]

Batch 1200/4538 | Loss: 80.737961


Epoch 100/100 [Train]:  31%|███       | 1408/4538 [00:34<01:15, 41.64it/s]

Batch 1400/4538 | Loss: 90.324234


Epoch 100/100 [Train]:  35%|███▌      | 1608/4538 [00:39<01:10, 41.63it/s]

Batch 1600/4538 | Loss: 80.948326


Epoch 100/100 [Train]:  40%|███▉      | 1807/4538 [00:44<01:11, 38.09it/s]

Batch 1800/4538 | Loss: 81.724533


Epoch 100/100 [Train]:  44%|████▍     | 2007/4538 [00:49<01:05, 38.70it/s]

Batch 2000/4538 | Loss: 65.106903


Epoch 100/100 [Train]:  49%|████▊     | 2207/4538 [00:54<01:00, 38.64it/s]

Batch 2200/4538 | Loss: 74.586411


Epoch 100/100 [Train]:  53%|█████▎    | 2407/4538 [01:00<00:55, 38.34it/s]

Batch 2400/4538 | Loss: 88.586159


Epoch 100/100 [Train]:  57%|█████▋    | 2608/4538 [01:05<00:46, 41.38it/s]

Batch 2600/4538 | Loss: 89.047714


Epoch 100/100 [Train]:  62%|██████▏   | 2808/4538 [01:10<00:41, 41.28it/s]

Batch 2800/4538 | Loss: 78.074669


Epoch 100/100 [Train]:  66%|██████▋   | 3008/4538 [01:14<00:38, 39.97it/s]

Batch 3000/4538 | Loss: 77.283775


Epoch 100/100 [Train]:  71%|███████   | 3208/4538 [01:19<00:32, 41.53it/s]

Batch 3200/4538 | Loss: 104.027161


Epoch 100/100 [Train]:  75%|███████▌  | 3408/4538 [01:24<00:27, 41.07it/s]

Batch 3400/4538 | Loss: 84.276184


Epoch 100/100 [Train]:  80%|███████▉  | 3608/4538 [01:29<00:22, 41.23it/s]

Batch 3600/4538 | Loss: 86.819206


Epoch 100/100 [Train]:  84%|████████▍ | 3808/4538 [01:34<00:17, 41.45it/s]

Batch 3800/4538 | Loss: 105.585396


Epoch 100/100 [Train]:  88%|████████▊ | 4008/4538 [01:39<00:12, 41.31it/s]

Batch 4000/4538 | Loss: 67.024620


Epoch 100/100 [Train]:  93%|█████████▎| 4208/4538 [01:43<00:07, 41.59it/s]

Batch 4200/4538 | Loss: 85.913246


Epoch 100/100 [Train]:  97%|█████████▋| 4408/4538 [01:48<00:03, 41.28it/s]

Batch 4400/4538 | Loss: 96.678246


Epoch 100/100 [Train]: 100%|██████████| 4538/4538 [01:51<00:00, 40.52it/s]
Evaluating Training Set: 100%|██████████| 4538/4538 [00:33<00:00, 133.84it/s]
Validation: 100%|██████████| 787/787 [00:07<00:00, 103.49it/s]


Epoch 100/100 | Average Train Loss: 83.049263 | Average Validation Loss: 83.006645
Saved new best model with validation loss: 83.006645
Loaded best model from epoch 100 with validation loss: 83.006645
