In [1]:
# PYTHON IMPORTS
import os
import copy
from tqdm.notebook import trange, tqdm

# IMAGE IMPORTS 
from PIL import Image
import cv2

# DATA IMPORTS 
import random
import h5py
import numpy as np

# PLOTTING
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# NEURAL NETWORK
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from torchvision.transforms import ToPILImage, GaussianBlur
from torchvision.transforms import Compose, RandomCrop, ToTensor, Normalize
import torch.optim.lr_scheduler as lr_scheduler
import torchvision.models as models
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# MY OWN CLASSES
from TileLocator import *
Image.MAX_IMAGE_PIXELS = 933120000

In [2]:
base_dir = r"C:\Users\fhacesga\OneDrive - University Of Houston\AAA_RECTDNN\data"

input_folder = f"{base_dir}/RoadLocator/in"
val_folder = f"{base_dir}/RoadLocator/in"
target_folder = f"{base_dir}/RoadLocator/out"
batch_size = 4
verbose_level = 1

transform = transforms.Compose([
    transforms.RandomRotation(degrees=180),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
])

tensor = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = SegmentationDataset_Multiclass(input_folder, target_folder, transform=transform, crop=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

loaders = {'train' : train_loader}

In [3]:
len(train_dataset)

4

In [4]:
def notify(mess, level=4):
    if verbose_level >= level:
        print(mess)

def saveImages(prob_img_or, outputs, filenames, minputs, outputs_folder, rep_id, labels):
    # IF FOR SOME REASON OUTPUT IS UNBATCHED, BATCH IT
    if prob_img_or.ndim == 3:
        prob_img_or = prob_img_or.unsqueeze(0)
    prob_img_or = prob_img_or.numpy()
    
    # FOR EACH BATCHED OUTPUT
    for i in range(len(outputs)):
        try:
            filename = filenames[i]

            # FOR EACH CLASS
            for ii in range(prob_img_or.shape[1]):

                # GET CURRENT PROBABILITY IMAGE AND SAVE
                prob_img = prob_img_or[i, ii, :, :]
                prob_img = (prob_img * 255).astype(np.uint8) 
                prob_img = Image.fromarray(np.squeeze(prob_img))
                prob_img.save(os.path.join(outputs_folder, f"{rep_id}_{ii}_{filename}"))

            # SAVE INPUTS
            myinp = Image.fromarray(np.uint8(minputs[i, 0, :, :] * 255))
            myinp.save(os.path.join(outputs_folder, f"{rep_id}_{filename[:-4]}_inp.png"))

            # SAVE OUTPUTS
            mylab = Image.fromarray(np.uint8(labels[i, :, :] * 255))
            myinp.save(os.path.join(outputs_folder, f"{rep_id}_{filename[:-4]}_lab.png"))
        except:
            continue

def train(model, dataloaders, num_epochs=50, 
          output_dir=f'{base_dir}/RoadLocator/intermediate_outputs', 
          continue_from=None,
          learning_rate=5e-4,
          device = torch.device("cuda:0"),
          weights=[10, 1]):
    
    # TRAINING PARAMETERS
    weights = torch.tensor(weights).float().to(device)
    criterion = nn.CrossEntropyLoss(weight=weights, reduction="mean")
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    learning_rate_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.99)
        
    if continue_from is not None:
        checkpoint = torch.load(continue_from)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
        epoch_start = checkpoint['epoch']
    else:
        epoch_start = 0
        
    # SEND MODEL TO GPU
    model = model.to(device)
    
    # MAKE SURE DIRS FOR TEMP OUTPUTS EXIST
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    # LOOP THROUGH EPOCHS
    for epoch in range(epoch_start, epoch_start+num_epochs):
        notify('Epoch {}/{}'.format(epoch+1, num_epochs), level=1)
        notify('-' * 10, level=1)
        epoch_loss = 0
        
        # FOR BOTH PHASES
        for phase in ['train']: 
            if phase == 'train':
                model.train()
                repeats = range(8)
            else:
                model.eval()
                repeats = range(1)
            
            # MAKE DIR FOR CURRENT PHASE IF IT DOES NOT EXIST
            outputs_folder = os.path.join(output_dir, phase)
            if not os.path.exists(outputs_folder):
                os.makedirs(outputs_folder)
                
            # BASELINE MEMORY USAGE                
            notify(f"Prior to Iterations\t {torch.cuda.memory_allocated() / 1e6}")
            
            # ITERATE OVER REPEATS
            for rep_id in tqdm(repeats):
                for inputs, labels, filenames in dataloaders[phase]:
                                        
                    # SEND TO GPU
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    notify(f"Datasets Moved\t\t {torch.cuda.memory_allocated()/ 1e6}")
                    
                    # ZERO GRADIENTS AND THROUGH MODEL
                    optimizer.zero_grad()
                    outputs = model(inputs, resize=False)
                    
                    notify(f"Outputs Processed\t {torch.cuda.memory_allocated() / 1e6}")
                    
                    # CALCULATE LOSS AND KEEP TRACK OF IT FOR CURRENT EPOCH
                    loss = criterion(outputs, labels) 
                    epoch_loss += loss
                    notify(f"Loss Calculated\t\t {torch.cuda.memory_allocated() / 1e6}")
                    
                    # OPTIMIZE IF PHASE IS TRAINING
                    if phase is 'train':
                        notify("Optimizing")
                        loss.backward()
                        optimizer.step()
                        notify(f"Backwards and optimized\t {torch.cuda.memory_allocated() / 1e6}")
                    
                    # SAVE TRAINING IMAGES IF CURRENT STEP REQUIRES IT
                    if rep_id % 2 == 0 and rep_id != 0:
                        prob_img_or = outputs.detach().cpu()
                        minputs     = inputs.detach().cpu().numpy()
                        mlabels     = labels.detach().cpu().numpy()
                        saveImages(prob_img_or, outputs, filenames, minputs, outputs_folder, rep_id, mlabels)
                    
                    # EMPTY CACHE
                    torch.cuda.empty_cache()
                    notify(f"Cache Emptied\t\t {torch.cuda.memory_allocated() / 1e6}")
                    
            notify('{} Loss: {:.4f}'.format(phase, epoch_loss), level=1)
            
        # UPDATE LEARNING RATE SCHEDULER
        learning_rate_scheduler.step()
        notify(f"Learning Rate: {optimizer.param_groups[0]['lr']}", level=1)
        
        # SAVE MODEL EVERY TEN EPOCHS
        if epoch % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }, f'{base_dir}/RoadLocator/checkpoint_080223.pth')
            
            torch.save(model, f"{base_dir}/RoadLocator/080223.pth")
        
    return model

In [5]:
model = RectangleClass(num_classes=2, finalpadding=1)
model = train(model, loaders, 
              num_epochs=2500, 
              learning_rate=1e-3,
              continue_from=f'{base_dir}/RoadLocator/checkpoint_080123.pth'
             )

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Epoch 2491/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

  output = self.softmax(output)


train Loss: 4.3360
Learning Rate: 8.18772890527084e-05
Epoch 2492/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 4.8616
Learning Rate: 8.18772890527084e-05
Epoch 2493/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 4.1749
Learning Rate: 8.18772890527084e-05
Epoch 2494/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 4.0224
Learning Rate: 8.18772890527084e-05
Epoch 2495/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.6800
Learning Rate: 8.18772890527084e-05
Epoch 2496/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2956
Learning Rate: 8.18772890527084e-05
Epoch 2497/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2008
Learning Rate: 8.18772890527084e-05
Epoch 2498/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.3637
Learning Rate: 8.18772890527084e-05
Epoch 2499/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1543
Learning Rate: 8.18772890527084e-05
Epoch 2500/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0259
Learning Rate: 8.105851616218132e-05
Epoch 2501/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1342
Learning Rate: 8.105851616218132e-05
Epoch 2502/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9309
Learning Rate: 8.105851616218132e-05
Epoch 2503/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1065
Learning Rate: 8.105851616218132e-05
Epoch 2504/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1286
Learning Rate: 8.105851616218132e-05
Epoch 2505/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2147
Learning Rate: 8.105851616218132e-05
Epoch 2506/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2594
Learning Rate: 8.105851616218132e-05
Epoch 2507/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2716
Learning Rate: 8.105851616218132e-05
Epoch 2508/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1127
Learning Rate: 8.105851616218132e-05
Epoch 2509/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2078
Learning Rate: 8.105851616218132e-05
Epoch 2510/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1801
Learning Rate: 8.024793100055952e-05
Epoch 2511/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2028
Learning Rate: 8.024793100055952e-05
Epoch 2512/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0170
Learning Rate: 8.024793100055952e-05
Epoch 2513/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1674
Learning Rate: 8.024793100055952e-05
Epoch 2514/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0408
Learning Rate: 8.024793100055952e-05
Epoch 2515/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1595
Learning Rate: 8.024793100055952e-05
Epoch 2516/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1566
Learning Rate: 8.024793100055952e-05
Epoch 2517/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0998
Learning Rate: 8.024793100055952e-05
Epoch 2518/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8974
Learning Rate: 8.024793100055952e-05
Epoch 2519/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9207
Learning Rate: 8.024793100055952e-05
Epoch 2520/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1413
Learning Rate: 7.944545169055392e-05
Epoch 2521/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9600
Learning Rate: 7.944545169055392e-05
Epoch 2522/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9332
Learning Rate: 7.944545169055392e-05
Epoch 2523/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2366
Learning Rate: 7.944545169055392e-05
Epoch 2524/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9398
Learning Rate: 7.944545169055392e-05
Epoch 2525/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0666
Learning Rate: 7.944545169055392e-05
Epoch 2526/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2123
Learning Rate: 7.944545169055392e-05
Epoch 2527/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9656
Learning Rate: 7.944545169055392e-05
Epoch 2528/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0303
Learning Rate: 7.944545169055392e-05
Epoch 2529/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0615
Learning Rate: 7.944545169055392e-05
Epoch 2530/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1019
Learning Rate: 7.865099717364838e-05
Epoch 2531/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0493
Learning Rate: 7.865099717364838e-05
Epoch 2532/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1453
Learning Rate: 7.865099717364838e-05
Epoch 2533/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1032
Learning Rate: 7.865099717364838e-05
Epoch 2534/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0039
Learning Rate: 7.865099717364838e-05
Epoch 2535/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9640
Learning Rate: 7.865099717364838e-05
Epoch 2536/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2368
Learning Rate: 7.865099717364838e-05
Epoch 2537/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8307
Learning Rate: 7.865099717364838e-05
Epoch 2538/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9564
Learning Rate: 7.865099717364838e-05
Epoch 2539/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9768
Learning Rate: 7.865099717364838e-05
Epoch 2540/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0277
Learning Rate: 7.78644872019119e-05
Epoch 2541/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9663
Learning Rate: 7.78644872019119e-05
Epoch 2542/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9191
Learning Rate: 7.78644872019119e-05
Epoch 2543/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0196
Learning Rate: 7.78644872019119e-05
Epoch 2544/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1497
Learning Rate: 7.78644872019119e-05
Epoch 2545/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1175
Learning Rate: 7.78644872019119e-05
Epoch 2546/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9863
Learning Rate: 7.78644872019119e-05
Epoch 2547/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1605
Learning Rate: 7.78644872019119e-05
Epoch 2548/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8786
Learning Rate: 7.78644872019119e-05
Epoch 2549/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0442
Learning Rate: 7.78644872019119e-05
Epoch 2550/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8457
Learning Rate: 7.708584232989278e-05
Epoch 2551/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9580
Learning Rate: 7.708584232989278e-05
Epoch 2552/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0549
Learning Rate: 7.708584232989278e-05
Epoch 2553/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0107
Learning Rate: 7.708584232989278e-05
Epoch 2554/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9875
Learning Rate: 7.708584232989278e-05
Epoch 2555/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9792
Learning Rate: 7.708584232989278e-05
Epoch 2556/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9433
Learning Rate: 7.708584232989278e-05
Epoch 2557/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0803
Learning Rate: 7.708584232989278e-05
Epoch 2558/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8956
Learning Rate: 7.708584232989278e-05
Epoch 2559/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1570
Learning Rate: 7.708584232989278e-05
Epoch 2560/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0123
Learning Rate: 7.631498390659385e-05
Epoch 2561/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8699
Learning Rate: 7.631498390659385e-05
Epoch 2562/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2540
Learning Rate: 7.631498390659385e-05
Epoch 2563/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0656
Learning Rate: 7.631498390659385e-05
Epoch 2564/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2395
Learning Rate: 7.631498390659385e-05
Epoch 2565/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0450
Learning Rate: 7.631498390659385e-05
Epoch 2566/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0976
Learning Rate: 7.631498390659385e-05
Epoch 2567/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9905
Learning Rate: 7.631498390659385e-05
Epoch 2568/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0267
Learning Rate: 7.631498390659385e-05
Epoch 2569/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8317
Learning Rate: 7.631498390659385e-05
Epoch 2570/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2201
Learning Rate: 7.555183406752792e-05
Epoch 2571/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0025
Learning Rate: 7.555183406752792e-05
Epoch 2572/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1402
Learning Rate: 7.555183406752792e-05
Epoch 2573/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8704
Learning Rate: 7.555183406752792e-05
Epoch 2574/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7614
Learning Rate: 7.555183406752792e-05
Epoch 2575/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9587
Learning Rate: 7.555183406752792e-05
Epoch 2576/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8697
Learning Rate: 7.555183406752792e-05
Epoch 2577/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8602
Learning Rate: 7.555183406752792e-05
Epoch 2578/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9726
Learning Rate: 7.555183406752792e-05
Epoch 2579/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0266
Learning Rate: 7.555183406752792e-05
Epoch 2580/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9629
Learning Rate: 7.479631572685264e-05
Epoch 2581/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9283
Learning Rate: 7.479631572685264e-05
Epoch 2582/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9625
Learning Rate: 7.479631572685264e-05
Epoch 2583/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8264
Learning Rate: 7.479631572685264e-05
Epoch 2584/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9080
Learning Rate: 7.479631572685264e-05
Epoch 2585/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1262
Learning Rate: 7.479631572685264e-05
Epoch 2586/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8453
Learning Rate: 7.479631572685264e-05
Epoch 2587/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0259
Learning Rate: 7.479631572685264e-05
Epoch 2588/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0750
Learning Rate: 7.479631572685264e-05
Epoch 2589/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9831
Learning Rate: 7.479631572685264e-05
Epoch 2590/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0150
Learning Rate: 7.404835256958412e-05
Epoch 2591/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8705
Learning Rate: 7.404835256958412e-05
Epoch 2592/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0002
Learning Rate: 7.404835256958412e-05
Epoch 2593/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9973
Learning Rate: 7.404835256958412e-05
Epoch 2594/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0607
Learning Rate: 7.404835256958412e-05
Epoch 2595/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0389
Learning Rate: 7.404835256958412e-05
Epoch 2596/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8561
Learning Rate: 7.404835256958412e-05
Epoch 2597/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0045
Learning Rate: 7.404835256958412e-05
Epoch 2598/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9059
Learning Rate: 7.404835256958412e-05
Epoch 2599/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0461
Learning Rate: 7.404835256958412e-05
Epoch 2600/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8404
Learning Rate: 7.330786904388828e-05
Epoch 2601/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8465
Learning Rate: 7.330786904388828e-05
Epoch 2602/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8617
Learning Rate: 7.330786904388828e-05
Epoch 2603/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8141
Learning Rate: 7.330786904388828e-05
Epoch 2604/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9075
Learning Rate: 7.330786904388828e-05
Epoch 2605/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1337
Learning Rate: 7.330786904388828e-05
Epoch 2606/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8661
Learning Rate: 7.330786904388828e-05
Epoch 2607/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8892
Learning Rate: 7.330786904388828e-05
Epoch 2608/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8248
Learning Rate: 7.330786904388828e-05
Epoch 2609/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8199
Learning Rate: 7.330786904388828e-05
Epoch 2610/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0186
Learning Rate: 7.25747903534494e-05
Epoch 2611/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0492
Learning Rate: 7.25747903534494e-05
Epoch 2612/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1650
Learning Rate: 7.25747903534494e-05
Epoch 2613/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1191
Learning Rate: 7.25747903534494e-05
Epoch 2614/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0331
Learning Rate: 7.25747903534494e-05
Epoch 2615/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0045
Learning Rate: 7.25747903534494e-05
Epoch 2616/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9577
Learning Rate: 7.25747903534494e-05
Epoch 2617/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8685
Learning Rate: 7.25747903534494e-05
Epoch 2618/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7405
Learning Rate: 7.25747903534494e-05
Epoch 2619/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1049
Learning Rate: 7.25747903534494e-05
Epoch 2620/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9702
Learning Rate: 7.18490424499149e-05
Epoch 2621/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9700
Learning Rate: 7.18490424499149e-05
Epoch 2622/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9730
Learning Rate: 7.18490424499149e-05
Epoch 2623/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9343
Learning Rate: 7.18490424499149e-05
Epoch 2624/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9472
Learning Rate: 7.18490424499149e-05
Epoch 2625/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1017
Learning Rate: 7.18490424499149e-05
Epoch 2626/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8103
Learning Rate: 7.18490424499149e-05
Epoch 2627/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9795
Learning Rate: 7.18490424499149e-05
Epoch 2628/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7663
Learning Rate: 7.18490424499149e-05
Epoch 2629/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8183
Learning Rate: 7.18490424499149e-05
Epoch 2630/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9738
Learning Rate: 7.113055202541575e-05
Epoch 2631/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9931
Learning Rate: 7.113055202541575e-05
Epoch 2632/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8651
Learning Rate: 7.113055202541575e-05
Epoch 2633/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8338
Learning Rate: 7.113055202541575e-05
Epoch 2634/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9366
Learning Rate: 7.113055202541575e-05
Epoch 2635/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8695
Learning Rate: 7.113055202541575e-05
Epoch 2636/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9352
Learning Rate: 7.113055202541575e-05
Epoch 2637/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2380
Learning Rate: 7.113055202541575e-05
Epoch 2638/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9460
Learning Rate: 7.113055202541575e-05
Epoch 2639/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8534
Learning Rate: 7.113055202541575e-05
Epoch 2640/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0249
Learning Rate: 7.04192465051616e-05
Epoch 2641/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9953
Learning Rate: 7.04192465051616e-05
Epoch 2642/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1901
Learning Rate: 7.04192465051616e-05
Epoch 2643/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8338
Learning Rate: 7.04192465051616e-05
Epoch 2644/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8462
Learning Rate: 7.04192465051616e-05
Epoch 2645/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8559
Learning Rate: 7.04192465051616e-05
Epoch 2646/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9217
Learning Rate: 7.04192465051616e-05
Epoch 2647/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0463
Learning Rate: 7.04192465051616e-05
Epoch 2648/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8635
Learning Rate: 7.04192465051616e-05
Epoch 2649/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9308
Learning Rate: 7.04192465051616e-05
Epoch 2650/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.2944
Learning Rate: 6.971505404010999e-05
Epoch 2651/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8110
Learning Rate: 6.971505404010999e-05
Epoch 2652/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8264
Learning Rate: 6.971505404010999e-05
Epoch 2653/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8148
Learning Rate: 6.971505404010999e-05
Epoch 2654/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9713
Learning Rate: 6.971505404010999e-05
Epoch 2655/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9359
Learning Rate: 6.971505404010999e-05
Epoch 2656/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8958
Learning Rate: 6.971505404010999e-05
Epoch 2657/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9298
Learning Rate: 6.971505404010999e-05
Epoch 2658/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8278
Learning Rate: 6.971505404010999e-05
Epoch 2659/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9826
Learning Rate: 6.971505404010999e-05
Epoch 2660/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8150
Learning Rate: 6.901790349970888e-05
Epoch 2661/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9711
Learning Rate: 6.901790349970888e-05
Epoch 2662/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9160
Learning Rate: 6.901790349970888e-05
Epoch 2663/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8902
Learning Rate: 6.901790349970888e-05
Epoch 2664/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0841
Learning Rate: 6.901790349970888e-05
Epoch 2665/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8577
Learning Rate: 6.901790349970888e-05
Epoch 2666/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9042
Learning Rate: 6.901790349970888e-05
Epoch 2667/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8590
Learning Rate: 6.901790349970888e-05
Epoch 2668/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0267
Learning Rate: 6.901790349970888e-05
Epoch 2669/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0007
Learning Rate: 6.901790349970888e-05
Epoch 2670/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9268
Learning Rate: 6.832772446471178e-05
Epoch 2671/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9010
Learning Rate: 6.832772446471178e-05
Epoch 2672/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8678
Learning Rate: 6.832772446471178e-05
Epoch 2673/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9612
Learning Rate: 6.832772446471178e-05
Epoch 2674/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9363
Learning Rate: 6.832772446471178e-05
Epoch 2675/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9929
Learning Rate: 6.832772446471178e-05
Epoch 2676/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8212
Learning Rate: 6.832772446471178e-05
Epoch 2677/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8971
Learning Rate: 6.832772446471178e-05
Epoch 2678/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8440
Learning Rate: 6.832772446471178e-05
Epoch 2679/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0352
Learning Rate: 6.832772446471178e-05
Epoch 2680/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9982
Learning Rate: 6.764444722006467e-05
Epoch 2681/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 6.764444722006467e-05
Epoch 2682/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9035
Learning Rate: 6.764444722006467e-05
Epoch 2683/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8989
Learning Rate: 6.764444722006467e-05
Epoch 2684/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8721
Learning Rate: 6.764444722006467e-05
Epoch 2685/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8740
Learning Rate: 6.764444722006467e-05
Epoch 2686/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9402
Learning Rate: 6.764444722006467e-05
Epoch 2687/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8604
Learning Rate: 6.764444722006467e-05
Epoch 2688/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9262
Learning Rate: 6.764444722006467e-05
Epoch 2689/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8977
Learning Rate: 6.764444722006467e-05
Epoch 2690/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9479
Learning Rate: 6.696800274786402e-05
Epoch 2691/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1869
Learning Rate: 6.696800274786402e-05
Epoch 2692/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8130
Learning Rate: 6.696800274786402e-05
Epoch 2693/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9085
Learning Rate: 6.696800274786402e-05
Epoch 2694/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9077
Learning Rate: 6.696800274786402e-05
Epoch 2695/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8952
Learning Rate: 6.696800274786402e-05
Epoch 2696/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8464
Learning Rate: 6.696800274786402e-05
Epoch 2697/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8602
Learning Rate: 6.696800274786402e-05
Epoch 2698/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9180
Learning Rate: 6.696800274786402e-05
Epoch 2699/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8620
Learning Rate: 6.696800274786402e-05
Epoch 2700/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9335
Learning Rate: 6.629832272038538e-05
Epoch 2701/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9717
Learning Rate: 6.629832272038538e-05
Epoch 2702/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9428
Learning Rate: 6.629832272038538e-05
Epoch 2703/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9506
Learning Rate: 6.629832272038538e-05
Epoch 2704/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7873
Learning Rate: 6.629832272038538e-05
Epoch 2705/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8341
Learning Rate: 6.629832272038538e-05
Epoch 2706/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9021
Learning Rate: 6.629832272038538e-05
Epoch 2707/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9260
Learning Rate: 6.629832272038538e-05
Epoch 2708/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8567
Learning Rate: 6.629832272038538e-05
Epoch 2709/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8916
Learning Rate: 6.629832272038538e-05
Epoch 2710/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8437
Learning Rate: 6.563533949318152e-05
Epoch 2711/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9239
Learning Rate: 6.563533949318152e-05
Epoch 2712/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8840
Learning Rate: 6.563533949318152e-05
Epoch 2713/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8709
Learning Rate: 6.563533949318152e-05
Epoch 2714/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8694
Learning Rate: 6.563533949318152e-05
Epoch 2715/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7721
Learning Rate: 6.563533949318152e-05
Epoch 2716/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9224
Learning Rate: 6.563533949318152e-05
Epoch 2717/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7994
Learning Rate: 6.563533949318152e-05
Epoch 2718/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8849
Learning Rate: 6.563533949318152e-05
Epoch 2719/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8202
Learning Rate: 6.563533949318152e-05
Epoch 2720/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8726
Learning Rate: 6.497898609824971e-05
Epoch 2721/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9475
Learning Rate: 6.497898609824971e-05
Epoch 2722/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9595
Learning Rate: 6.497898609824971e-05
Epoch 2723/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9313
Learning Rate: 6.497898609824971e-05
Epoch 2724/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0154
Learning Rate: 6.497898609824971e-05
Epoch 2725/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9783
Learning Rate: 6.497898609824971e-05
Epoch 2726/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8074
Learning Rate: 6.497898609824971e-05
Epoch 2727/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8541
Learning Rate: 6.497898609824971e-05
Epoch 2728/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8506
Learning Rate: 6.497898609824971e-05
Epoch 2729/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9630
Learning Rate: 6.497898609824971e-05
Epoch 2730/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9150
Learning Rate: 6.432919623726721e-05
Epoch 2731/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8875
Learning Rate: 6.432919623726721e-05
Epoch 2732/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9637
Learning Rate: 6.432919623726721e-05
Epoch 2733/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8217
Learning Rate: 6.432919623726721e-05
Epoch 2734/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7565
Learning Rate: 6.432919623726721e-05
Epoch 2735/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8809
Learning Rate: 6.432919623726721e-05
Epoch 2736/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1826
Learning Rate: 6.432919623726721e-05
Epoch 2737/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7918
Learning Rate: 6.432919623726721e-05
Epoch 2738/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7110
Learning Rate: 6.432919623726721e-05
Epoch 2739/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9654
Learning Rate: 6.432919623726721e-05
Epoch 2740/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8497
Learning Rate: 6.368590427489455e-05
Epoch 2741/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9972
Learning Rate: 6.368590427489455e-05
Epoch 2742/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9559
Learning Rate: 6.368590427489455e-05
Epoch 2743/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8468
Learning Rate: 6.368590427489455e-05
Epoch 2744/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9196
Learning Rate: 6.368590427489455e-05
Epoch 2745/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8812
Learning Rate: 6.368590427489455e-05
Epoch 2746/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8317
Learning Rate: 6.368590427489455e-05
Epoch 2747/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0280
Learning Rate: 6.368590427489455e-05
Epoch 2748/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1265
Learning Rate: 6.368590427489455e-05
Epoch 2749/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8143
Learning Rate: 6.368590427489455e-05
Epoch 2750/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7947
Learning Rate: 6.30490452321456e-05
Epoch 2751/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8030
Learning Rate: 6.30490452321456e-05
Epoch 2752/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0443
Learning Rate: 6.30490452321456e-05
Epoch 2753/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1513
Learning Rate: 6.30490452321456e-05
Epoch 2754/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9645
Learning Rate: 6.30490452321456e-05
Epoch 2755/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8031
Learning Rate: 6.30490452321456e-05
Epoch 2756/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7357
Learning Rate: 6.30490452321456e-05
Epoch 2757/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9203
Learning Rate: 6.30490452321456e-05
Epoch 2758/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0752
Learning Rate: 6.30490452321456e-05
Epoch 2759/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9866
Learning Rate: 6.30490452321456e-05
Epoch 2760/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9018
Learning Rate: 6.241855477982414e-05
Epoch 2761/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8607
Learning Rate: 6.241855477982414e-05
Epoch 2762/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8805
Learning Rate: 6.241855477982414e-05
Epoch 2763/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8765
Learning Rate: 6.241855477982414e-05
Epoch 2764/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8713
Learning Rate: 6.241855477982414e-05
Epoch 2765/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8216
Learning Rate: 6.241855477982414e-05
Epoch 2766/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9073
Learning Rate: 6.241855477982414e-05
Epoch 2767/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9604
Learning Rate: 6.241855477982414e-05
Epoch 2768/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8565
Learning Rate: 6.241855477982414e-05
Epoch 2769/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8819
Learning Rate: 6.241855477982414e-05
Epoch 2770/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9167
Learning Rate: 6.17943692320259e-05
Epoch 2771/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9226
Learning Rate: 6.17943692320259e-05
Epoch 2772/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0749
Learning Rate: 6.17943692320259e-05
Epoch 2773/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9745
Learning Rate: 6.17943692320259e-05
Epoch 2774/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9523
Learning Rate: 6.17943692320259e-05
Epoch 2775/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9054
Learning Rate: 6.17943692320259e-05
Epoch 2776/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8923
Learning Rate: 6.17943692320259e-05
Epoch 2777/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8171
Learning Rate: 6.17943692320259e-05
Epoch 2778/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9556
Learning Rate: 6.17943692320259e-05
Epoch 2779/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7881
Learning Rate: 6.17943692320259e-05
Epoch 2780/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0249
Learning Rate: 6.117642553970564e-05
Epoch 2781/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9637
Learning Rate: 6.117642553970564e-05
Epoch 2782/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0041
Learning Rate: 6.117642553970564e-05
Epoch 2783/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7684
Learning Rate: 6.117642553970564e-05
Epoch 2784/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0436
Learning Rate: 6.117642553970564e-05
Epoch 2785/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0664
Learning Rate: 6.117642553970564e-05
Epoch 2786/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9036
Learning Rate: 6.117642553970564e-05
Epoch 2787/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0249
Learning Rate: 6.117642553970564e-05
Epoch 2788/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9455
Learning Rate: 6.117642553970564e-05
Epoch 2789/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8213
Learning Rate: 6.117642553970564e-05
Epoch 2790/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8055
Learning Rate: 6.056466128430859e-05
Epoch 2791/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8597
Learning Rate: 6.056466128430859e-05
Epoch 2792/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8264
Learning Rate: 6.056466128430859e-05
Epoch 2793/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9156
Learning Rate: 6.056466128430859e-05
Epoch 2794/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1531
Learning Rate: 6.056466128430859e-05
Epoch 2795/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8505
Learning Rate: 6.056466128430859e-05
Epoch 2796/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8909
Learning Rate: 6.056466128430859e-05
Epoch 2797/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8453
Learning Rate: 6.056466128430859e-05
Epoch 2798/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7856
Learning Rate: 6.056466128430859e-05
Epoch 2799/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8376
Learning Rate: 6.056466128430859e-05
Epoch 2800/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8567
Learning Rate: 5.99590146714655e-05
Epoch 2801/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8050
Learning Rate: 5.99590146714655e-05
Epoch 2802/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8020
Learning Rate: 5.99590146714655e-05
Epoch 2803/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8999
Learning Rate: 5.99590146714655e-05
Epoch 2804/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9634
Learning Rate: 5.99590146714655e-05
Epoch 2805/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8008
Learning Rate: 5.99590146714655e-05
Epoch 2806/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8385
Learning Rate: 5.99590146714655e-05
Epoch 2807/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8836
Learning Rate: 5.99590146714655e-05
Epoch 2808/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8364
Learning Rate: 5.99590146714655e-05
Epoch 2809/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7483
Learning Rate: 5.99590146714655e-05
Epoch 2810/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9475
Learning Rate: 5.935942452475084e-05
Epoch 2811/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8319
Learning Rate: 5.935942452475084e-05
Epoch 2812/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1091
Learning Rate: 5.935942452475084e-05
Epoch 2813/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9541
Learning Rate: 5.935942452475084e-05
Epoch 2814/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8360
Learning Rate: 5.935942452475084e-05
Epoch 2815/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7517
Learning Rate: 5.935942452475084e-05
Epoch 2816/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7016
Learning Rate: 5.935942452475084e-05
Epoch 2817/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8351
Learning Rate: 5.935942452475084e-05
Epoch 2818/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9647
Learning Rate: 5.935942452475084e-05
Epoch 2819/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8118
Learning Rate: 5.935942452475084e-05
Epoch 2820/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8548
Learning Rate: 5.876583027950333e-05
Epoch 2821/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8976
Learning Rate: 5.876583027950333e-05
Epoch 2822/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8813
Learning Rate: 5.876583027950333e-05
Epoch 2823/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8326
Learning Rate: 5.876583027950333e-05
Epoch 2824/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7959
Learning Rate: 5.876583027950333e-05
Epoch 2825/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1353
Learning Rate: 5.876583027950333e-05
Epoch 2826/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9007
Learning Rate: 5.876583027950333e-05
Epoch 2827/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9240
Learning Rate: 5.876583027950333e-05
Epoch 2828/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8261
Learning Rate: 5.876583027950333e-05
Epoch 2829/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7845
Learning Rate: 5.876583027950333e-05
Epoch 2830/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8536
Learning Rate: 5.8178171976708296e-05
Epoch 2831/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9329
Learning Rate: 5.8178171976708296e-05
Epoch 2832/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8517
Learning Rate: 5.8178171976708296e-05
Epoch 2833/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7332
Learning Rate: 5.8178171976708296e-05
Epoch 2834/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7798
Learning Rate: 5.8178171976708296e-05
Epoch 2835/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7375
Learning Rate: 5.8178171976708296e-05
Epoch 2836/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8293
Learning Rate: 5.8178171976708296e-05
Epoch 2837/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1169
Learning Rate: 5.8178171976708296e-05
Epoch 2838/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9597
Learning Rate: 5.8178171976708296e-05
Epoch 2839/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9198
Learning Rate: 5.8178171976708296e-05
Epoch 2840/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8447
Learning Rate: 5.759639025694121e-05
Epoch 2841/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9574
Learning Rate: 5.759639025694121e-05
Epoch 2842/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8862
Learning Rate: 5.759639025694121e-05
Epoch 2843/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9861
Learning Rate: 5.759639025694121e-05
Epoch 2844/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0479
Learning Rate: 5.759639025694121e-05
Epoch 2845/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8606
Learning Rate: 5.759639025694121e-05
Epoch 2846/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8209
Learning Rate: 5.759639025694121e-05
Epoch 2847/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8820
Learning Rate: 5.759639025694121e-05
Epoch 2848/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9225
Learning Rate: 5.759639025694121e-05
Epoch 2849/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7792
Learning Rate: 5.759639025694121e-05
Epoch 2850/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9683
Learning Rate: 5.70204263543718e-05
Epoch 2851/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8263
Learning Rate: 5.70204263543718e-05
Epoch 2852/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9431
Learning Rate: 5.70204263543718e-05
Epoch 2853/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8385
Learning Rate: 5.70204263543718e-05
Epoch 2854/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9068
Learning Rate: 5.70204263543718e-05
Epoch 2855/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7992
Learning Rate: 5.70204263543718e-05
Epoch 2856/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8746
Learning Rate: 5.70204263543718e-05
Epoch 2857/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8760
Learning Rate: 5.70204263543718e-05
Epoch 2858/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9509
Learning Rate: 5.70204263543718e-05
Epoch 2859/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9570
Learning Rate: 5.70204263543718e-05
Epoch 2860/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9119
Learning Rate: 5.645022209082808e-05
Epoch 2861/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0099
Learning Rate: 5.645022209082808e-05
Epoch 2862/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8356
Learning Rate: 5.645022209082808e-05
Epoch 2863/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8684
Learning Rate: 5.645022209082808e-05
Epoch 2864/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8210
Learning Rate: 5.645022209082808e-05
Epoch 2865/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8617
Learning Rate: 5.645022209082808e-05
Epoch 2866/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7978
Learning Rate: 5.645022209082808e-05
Epoch 2867/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9562
Learning Rate: 5.645022209082808e-05
Epoch 2868/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7435
Learning Rate: 5.645022209082808e-05
Epoch 2869/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9384
Learning Rate: 5.645022209082808e-05
Epoch 2870/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9000
Learning Rate: 5.5885719869919796e-05
Epoch 2871/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9476
Learning Rate: 5.5885719869919796e-05
Epoch 2872/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8600
Learning Rate: 5.5885719869919796e-05
Epoch 2873/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7962
Learning Rate: 5.5885719869919796e-05
Epoch 2874/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8001
Learning Rate: 5.5885719869919796e-05
Epoch 2875/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8471
Learning Rate: 5.5885719869919796e-05
Epoch 2876/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8673
Learning Rate: 5.5885719869919796e-05
Epoch 2877/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8873
Learning Rate: 5.5885719869919796e-05
Epoch 2878/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7600
Learning Rate: 5.5885719869919796e-05
Epoch 2879/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7766
Learning Rate: 5.5885719869919796e-05
Epoch 2880/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9222
Learning Rate: 5.5326862671220596e-05
Epoch 2881/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8687
Learning Rate: 5.5326862671220596e-05
Epoch 2882/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8213
Learning Rate: 5.5326862671220596e-05
Epoch 2883/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0414
Learning Rate: 5.5326862671220596e-05
Epoch 2884/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8453
Learning Rate: 5.5326862671220596e-05
Epoch 2885/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9412
Learning Rate: 5.5326862671220596e-05
Epoch 2886/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1093
Learning Rate: 5.5326862671220596e-05
Epoch 2887/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8558
Learning Rate: 5.5326862671220596e-05
Epoch 2888/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7106
Learning Rate: 5.5326862671220596e-05
Epoch 2889/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7085
Learning Rate: 5.5326862671220596e-05
Epoch 2890/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0784
Learning Rate: 5.477359404450839e-05
Epoch 2891/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8342
Learning Rate: 5.477359404450839e-05
Epoch 2892/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7471
Learning Rate: 5.477359404450839e-05
Epoch 2893/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9341
Learning Rate: 5.477359404450839e-05
Epoch 2894/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7858
Learning Rate: 5.477359404450839e-05
Epoch 2895/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8629
Learning Rate: 5.477359404450839e-05
Epoch 2896/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8757
Learning Rate: 5.477359404450839e-05
Epoch 2897/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8588
Learning Rate: 5.477359404450839e-05
Epoch 2898/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9059
Learning Rate: 5.477359404450839e-05
Epoch 2899/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8955
Learning Rate: 5.477359404450839e-05
Epoch 2900/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9227
Learning Rate: 5.42258581040633e-05
Epoch 2901/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8897
Learning Rate: 5.42258581040633e-05
Epoch 2902/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7958
Learning Rate: 5.42258581040633e-05
Epoch 2903/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8680
Learning Rate: 5.42258581040633e-05
Epoch 2904/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7721
Learning Rate: 5.42258581040633e-05
Epoch 2905/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9602
Learning Rate: 5.42258581040633e-05
Epoch 2906/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8163
Learning Rate: 5.42258581040633e-05
Epoch 2907/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8873
Learning Rate: 5.42258581040633e-05
Epoch 2908/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9556
Learning Rate: 5.42258581040633e-05
Epoch 2909/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8677
Learning Rate: 5.42258581040633e-05
Epoch 2910/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6452
Learning Rate: 5.3683599523022665e-05
Epoch 2911/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7912
Learning Rate: 5.3683599523022665e-05
Epoch 2912/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9551
Learning Rate: 5.3683599523022665e-05
Epoch 2913/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0112
Learning Rate: 5.3683599523022665e-05
Epoch 2914/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8032
Learning Rate: 5.3683599523022665e-05
Epoch 2915/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8156
Learning Rate: 5.3683599523022665e-05
Epoch 2916/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9210
Learning Rate: 5.3683599523022665e-05
Epoch 2917/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0408
Learning Rate: 5.3683599523022665e-05
Epoch 2918/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8074
Learning Rate: 5.3683599523022665e-05
Epoch 2919/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9355
Learning Rate: 5.3683599523022665e-05
Epoch 2920/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7762
Learning Rate: 5.314676352779244e-05
Epoch 2921/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7998
Learning Rate: 5.314676352779244e-05
Epoch 2922/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8827
Learning Rate: 5.314676352779244e-05
Epoch 2923/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9738
Learning Rate: 5.314676352779244e-05
Epoch 2924/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9234
Learning Rate: 5.314676352779244e-05
Epoch 2925/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7467
Learning Rate: 5.314676352779244e-05
Epoch 2926/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7845
Learning Rate: 5.314676352779244e-05
Epoch 2927/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8409
Learning Rate: 5.314676352779244e-05
Epoch 2928/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9866
Learning Rate: 5.314676352779244e-05
Epoch 2929/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7758
Learning Rate: 5.314676352779244e-05
Epoch 2930/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9150
Learning Rate: 5.2615295892514514e-05
Epoch 2931/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8091
Learning Rate: 5.2615295892514514e-05
Epoch 2932/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7627
Learning Rate: 5.2615295892514514e-05
Epoch 2933/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9452
Learning Rate: 5.2615295892514514e-05
Epoch 2934/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8994
Learning Rate: 5.2615295892514514e-05
Epoch 2935/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8269
Learning Rate: 5.2615295892514514e-05
Epoch 2936/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8374
Learning Rate: 5.2615295892514514e-05
Epoch 2937/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8215
Learning Rate: 5.2615295892514514e-05
Epoch 2938/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8439
Learning Rate: 5.2615295892514514e-05
Epoch 2939/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7691
Learning Rate: 5.2615295892514514e-05
Epoch 2940/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8351
Learning Rate: 5.208914293358937e-05
Epoch 2941/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8597
Learning Rate: 5.208914293358937e-05
Epoch 2942/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8523
Learning Rate: 5.208914293358937e-05
Epoch 2943/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8352
Learning Rate: 5.208914293358937e-05
Epoch 2944/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7630
Learning Rate: 5.208914293358937e-05
Epoch 2945/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7786
Learning Rate: 5.208914293358937e-05
Epoch 2946/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9333
Learning Rate: 5.208914293358937e-05
Epoch 2947/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7793
Learning Rate: 5.208914293358937e-05
Epoch 2948/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7834
Learning Rate: 5.208914293358937e-05
Epoch 2949/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8556
Learning Rate: 5.208914293358937e-05
Epoch 2950/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9262
Learning Rate: 5.156825150425348e-05
Epoch 2951/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9469
Learning Rate: 5.156825150425348e-05
Epoch 2952/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8982
Learning Rate: 5.156825150425348e-05
Epoch 2953/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8405
Learning Rate: 5.156825150425348e-05
Epoch 2954/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8417
Learning Rate: 5.156825150425348e-05
Epoch 2955/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8027
Learning Rate: 5.156825150425348e-05
Epoch 2956/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7603
Learning Rate: 5.156825150425348e-05
Epoch 2957/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9520
Learning Rate: 5.156825150425348e-05
Epoch 2958/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9190
Learning Rate: 5.156825150425348e-05
Epoch 2959/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7925
Learning Rate: 5.156825150425348e-05
Epoch 2960/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8378
Learning Rate: 5.105256898921094e-05
Epoch 2961/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8936
Learning Rate: 5.105256898921094e-05
Epoch 2962/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8370
Learning Rate: 5.105256898921094e-05
Epoch 2963/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7974
Learning Rate: 5.105256898921094e-05
Epoch 2964/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9093
Learning Rate: 5.105256898921094e-05
Epoch 2965/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8940
Learning Rate: 5.105256898921094e-05
Epoch 2966/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7482
Learning Rate: 5.105256898921094e-05
Epoch 2967/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7869
Learning Rate: 5.105256898921094e-05
Epoch 2968/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7924
Learning Rate: 5.105256898921094e-05
Epoch 2969/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7049
Learning Rate: 5.105256898921094e-05
Epoch 2970/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8145
Learning Rate: 5.0542043299318834e-05
Epoch 2971/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9494
Learning Rate: 5.0542043299318834e-05
Epoch 2972/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8828
Learning Rate: 5.0542043299318834e-05
Epoch 2973/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7867
Learning Rate: 5.0542043299318834e-05
Epoch 2974/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9352
Learning Rate: 5.0542043299318834e-05
Epoch 2975/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8294
Learning Rate: 5.0542043299318834e-05
Epoch 2976/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8241
Learning Rate: 5.0542043299318834e-05
Epoch 2977/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7443
Learning Rate: 5.0542043299318834e-05
Epoch 2978/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0623
Learning Rate: 5.0542043299318834e-05
Epoch 2979/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8825
Learning Rate: 5.0542043299318834e-05
Epoch 2980/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7779
Learning Rate: 5.003662286632564e-05
Epoch 2981/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7500
Learning Rate: 5.003662286632564e-05
Epoch 2982/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8146
Learning Rate: 5.003662286632564e-05
Epoch 2983/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8297
Learning Rate: 5.003662286632564e-05
Epoch 2984/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8068
Learning Rate: 5.003662286632564e-05
Epoch 2985/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8061
Learning Rate: 5.003662286632564e-05
Epoch 2986/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6934
Learning Rate: 5.003662286632564e-05
Epoch 2987/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8264
Learning Rate: 5.003662286632564e-05
Epoch 2988/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0259
Learning Rate: 5.003662286632564e-05
Epoch 2989/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7971
Learning Rate: 5.003662286632564e-05
Epoch 2990/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8323
Learning Rate: 4.953625663766238e-05
Epoch 2991/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8797
Learning Rate: 4.953625663766238e-05
Epoch 2992/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8709
Learning Rate: 4.953625663766238e-05
Epoch 2993/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8972
Learning Rate: 4.953625663766238e-05
Epoch 2994/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9060
Learning Rate: 4.953625663766238e-05
Epoch 2995/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0213
Learning Rate: 4.953625663766238e-05
Epoch 2996/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8429
Learning Rate: 4.953625663766238e-05
Epoch 2997/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8946
Learning Rate: 4.953625663766238e-05
Epoch 2998/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8130
Learning Rate: 4.953625663766238e-05
Epoch 2999/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8559
Learning Rate: 4.953625663766238e-05
Epoch 3000/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8614
Learning Rate: 4.904089407128576e-05
Epoch 3001/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8749
Learning Rate: 4.904089407128576e-05
Epoch 3002/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8577
Learning Rate: 4.904089407128576e-05
Epoch 3003/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8558
Learning Rate: 4.904089407128576e-05
Epoch 3004/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7541
Learning Rate: 4.904089407128576e-05
Epoch 3005/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7898
Learning Rate: 4.904089407128576e-05
Epoch 3006/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8547
Learning Rate: 4.904089407128576e-05
Epoch 3007/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8460
Learning Rate: 4.904089407128576e-05
Epoch 3008/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7768
Learning Rate: 4.904089407128576e-05
Epoch 3009/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7542
Learning Rate: 4.904089407128576e-05
Epoch 3010/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9424
Learning Rate: 4.85504851305729e-05
Epoch 3011/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8543
Learning Rate: 4.85504851305729e-05
Epoch 3012/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8566
Learning Rate: 4.85504851305729e-05
Epoch 3013/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9536
Learning Rate: 4.85504851305729e-05
Epoch 3014/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8269
Learning Rate: 4.85504851305729e-05
Epoch 3015/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7814
Learning Rate: 4.85504851305729e-05
Epoch 3016/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8034
Learning Rate: 4.85504851305729e-05
Epoch 3017/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8670
Learning Rate: 4.85504851305729e-05
Epoch 3018/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8006
Learning Rate: 4.85504851305729e-05
Epoch 3019/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9389
Learning Rate: 4.85504851305729e-05
Epoch 3020/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8757
Learning Rate: 4.806498027926717e-05
Epoch 3021/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8337
Learning Rate: 4.806498027926717e-05
Epoch 3022/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8025
Learning Rate: 4.806498027926717e-05
Epoch 3023/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9480
Learning Rate: 4.806498027926717e-05
Epoch 3024/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8898
Learning Rate: 4.806498027926717e-05
Epoch 3025/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8456
Learning Rate: 4.806498027926717e-05
Epoch 3026/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7487
Learning Rate: 4.806498027926717e-05
Epoch 3027/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8544
Learning Rate: 4.806498027926717e-05
Epoch 3028/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9205
Learning Rate: 4.806498027926717e-05
Epoch 3029/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7945
Learning Rate: 4.806498027926717e-05
Epoch 3030/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8038
Learning Rate: 4.75843304764745e-05
Epoch 3031/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7830
Learning Rate: 4.75843304764745e-05
Epoch 3032/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9399
Learning Rate: 4.75843304764745e-05
Epoch 3033/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9941
Learning Rate: 4.75843304764745e-05
Epoch 3034/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9397
Learning Rate: 4.75843304764745e-05
Epoch 3035/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8962
Learning Rate: 4.75843304764745e-05
Epoch 3036/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8551
Learning Rate: 4.75843304764745e-05
Epoch 3037/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8771
Learning Rate: 4.75843304764745e-05
Epoch 3038/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7654
Learning Rate: 4.75843304764745e-05
Epoch 3039/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7894
Learning Rate: 4.75843304764745e-05
Epoch 3040/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8848
Learning Rate: 4.7108487171709756e-05
Epoch 3041/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8428
Learning Rate: 4.7108487171709756e-05
Epoch 3042/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8103
Learning Rate: 4.7108487171709756e-05
Epoch 3043/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7447
Learning Rate: 4.7108487171709756e-05
Epoch 3044/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8262
Learning Rate: 4.7108487171709756e-05
Epoch 3045/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9410
Learning Rate: 4.7108487171709756e-05
Epoch 3046/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9251
Learning Rate: 4.7108487171709756e-05
Epoch 3047/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8192
Learning Rate: 4.7108487171709756e-05
Epoch 3048/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8093
Learning Rate: 4.7108487171709756e-05
Epoch 3049/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8065
Learning Rate: 4.7108487171709756e-05
Epoch 3050/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7866
Learning Rate: 4.6637402299992657e-05
Epoch 3051/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8225
Learning Rate: 4.6637402299992657e-05
Epoch 3052/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7980
Learning Rate: 4.6637402299992657e-05
Epoch 3053/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7610
Learning Rate: 4.6637402299992657e-05
Epoch 3054/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8841
Learning Rate: 4.6637402299992657e-05
Epoch 3055/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7848
Learning Rate: 4.6637402299992657e-05
Epoch 3056/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9541
Learning Rate: 4.6637402299992657e-05
Epoch 3057/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9546
Learning Rate: 4.6637402299992657e-05
Epoch 3058/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7295
Learning Rate: 4.6637402299992657e-05
Epoch 3059/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7236
Learning Rate: 4.6637402299992657e-05
Epoch 3060/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9346
Learning Rate: 4.617102827699273e-05
Epoch 3061/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0153
Learning Rate: 4.617102827699273e-05
Epoch 3062/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9921
Learning Rate: 4.617102827699273e-05
Epoch 3063/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8271
Learning Rate: 4.617102827699273e-05
Epoch 3064/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7800
Learning Rate: 4.617102827699273e-05
Epoch 3065/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9430
Learning Rate: 4.617102827699273e-05
Epoch 3066/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9452
Learning Rate: 4.617102827699273e-05
Epoch 3067/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8899
Learning Rate: 4.617102827699273e-05
Epoch 3068/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8657
Learning Rate: 4.617102827699273e-05
Epoch 3069/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8641
Learning Rate: 4.617102827699273e-05
Epoch 3070/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8117
Learning Rate: 4.5709317994222804e-05
Epoch 3071/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8876
Learning Rate: 4.5709317994222804e-05
Epoch 3072/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7383
Learning Rate: 4.5709317994222804e-05
Epoch 3073/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8052
Learning Rate: 4.5709317994222804e-05
Epoch 3074/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7918
Learning Rate: 4.5709317994222804e-05
Epoch 3075/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9233
Learning Rate: 4.5709317994222804e-05
Epoch 3076/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7889
Learning Rate: 4.5709317994222804e-05
Epoch 3077/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7342
Learning Rate: 4.5709317994222804e-05
Epoch 3078/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8039
Learning Rate: 4.5709317994222804e-05
Epoch 3079/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8383
Learning Rate: 4.5709317994222804e-05
Epoch 3080/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7661
Learning Rate: 4.5252224814280574e-05
Epoch 3081/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8541
Learning Rate: 4.5252224814280574e-05
Epoch 3082/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7613
Learning Rate: 4.5252224814280574e-05
Epoch 3083/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8333
Learning Rate: 4.5252224814280574e-05
Epoch 3084/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9266
Learning Rate: 4.5252224814280574e-05
Epoch 3085/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8264
Learning Rate: 4.5252224814280574e-05
Epoch 3086/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7902
Learning Rate: 4.5252224814280574e-05
Epoch 3087/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7203
Learning Rate: 4.5252224814280574e-05
Epoch 3088/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7474
Learning Rate: 4.5252224814280574e-05
Epoch 3089/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9458
Learning Rate: 4.5252224814280574e-05
Epoch 3090/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7823
Learning Rate: 4.479970256613777e-05
Epoch 3091/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7213
Learning Rate: 4.479970256613777e-05
Epoch 3092/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8684
Learning Rate: 4.479970256613777e-05
Epoch 3093/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7495
Learning Rate: 4.479970256613777e-05
Epoch 3094/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7553
Learning Rate: 4.479970256613777e-05
Epoch 3095/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7646
Learning Rate: 4.479970256613777e-05
Epoch 3096/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8805
Learning Rate: 4.479970256613777e-05
Epoch 3097/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8692
Learning Rate: 4.479970256613777e-05
Epoch 3098/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8219
Learning Rate: 4.479970256613777e-05
Epoch 3099/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8607
Learning Rate: 4.479970256613777e-05
Epoch 3100/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9023
Learning Rate: 4.4351705540476394e-05
Epoch 3101/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8369
Learning Rate: 4.4351705540476394e-05
Epoch 3102/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8242
Learning Rate: 4.4351705540476394e-05
Epoch 3103/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8949
Learning Rate: 4.4351705540476394e-05
Epoch 3104/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8398
Learning Rate: 4.4351705540476394e-05
Epoch 3105/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7916
Learning Rate: 4.4351705540476394e-05
Epoch 3106/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7990
Learning Rate: 4.4351705540476394e-05
Epoch 3107/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8014
Learning Rate: 4.4351705540476394e-05
Epoch 3108/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7077
Learning Rate: 4.4351705540476394e-05
Epoch 3109/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7957
Learning Rate: 4.4351705540476394e-05
Epoch 3110/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8842
Learning Rate: 4.3908188485071626e-05
Epoch 3111/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8123
Learning Rate: 4.3908188485071626e-05
Epoch 3112/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9758
Learning Rate: 4.3908188485071626e-05
Epoch 3113/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7502
Learning Rate: 4.3908188485071626e-05
Epoch 3114/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6740
Learning Rate: 4.3908188485071626e-05
Epoch 3115/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7684
Learning Rate: 4.3908188485071626e-05
Epoch 3116/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8011
Learning Rate: 4.3908188485071626e-05
Epoch 3117/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8739
Learning Rate: 4.3908188485071626e-05
Epoch 3118/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9845
Learning Rate: 4.3908188485071626e-05
Epoch 3119/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8709
Learning Rate: 4.3908188485071626e-05
Epoch 3120/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7841
Learning Rate: 4.346910660022091e-05
Epoch 3121/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7284
Learning Rate: 4.346910660022091e-05
Epoch 3122/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8406
Learning Rate: 4.346910660022091e-05
Epoch 3123/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7789
Learning Rate: 4.346910660022091e-05
Epoch 3124/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8532
Learning Rate: 4.346910660022091e-05
Epoch 3125/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8596
Learning Rate: 4.346910660022091e-05
Epoch 3126/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8556
Learning Rate: 4.346910660022091e-05
Epoch 3127/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8558
Learning Rate: 4.346910660022091e-05
Epoch 3128/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9488
Learning Rate: 4.346910660022091e-05
Epoch 3129/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8449
Learning Rate: 4.346910660022091e-05
Epoch 3130/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7907
Learning Rate: 4.30344155342187e-05
Epoch 3131/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8607
Learning Rate: 4.30344155342187e-05
Epoch 3132/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8495
Learning Rate: 4.30344155342187e-05
Epoch 3133/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8544
Learning Rate: 4.30344155342187e-05
Epoch 3134/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8483
Learning Rate: 4.30344155342187e-05
Epoch 3135/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8225
Learning Rate: 4.30344155342187e-05
Epoch 3136/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8309
Learning Rate: 4.30344155342187e-05
Epoch 3137/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8270
Learning Rate: 4.30344155342187e-05
Epoch 3138/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7267
Learning Rate: 4.30344155342187e-05
Epoch 3139/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8173
Learning Rate: 4.30344155342187e-05
Epoch 3140/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6999
Learning Rate: 4.260407137887652e-05
Epoch 3141/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8260
Learning Rate: 4.260407137887652e-05
Epoch 3142/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8896
Learning Rate: 4.260407137887652e-05
Epoch 3143/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8496
Learning Rate: 4.260407137887652e-05
Epoch 3144/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7145
Learning Rate: 4.260407137887652e-05
Epoch 3145/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8430
Learning Rate: 4.260407137887652e-05
Epoch 3146/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7776
Learning Rate: 4.260407137887652e-05
Epoch 3147/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7511
Learning Rate: 4.260407137887652e-05
Epoch 3148/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8279
Learning Rate: 4.260407137887652e-05
Epoch 3149/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7530
Learning Rate: 4.260407137887652e-05
Epoch 3150/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8567
Learning Rate: 4.217803066508775e-05
Epoch 3151/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7152
Learning Rate: 4.217803066508775e-05
Epoch 3152/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7031
Learning Rate: 4.217803066508775e-05
Epoch 3153/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7747
Learning Rate: 4.217803066508775e-05
Epoch 3154/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8457
Learning Rate: 4.217803066508775e-05
Epoch 3155/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9366
Learning Rate: 4.217803066508775e-05
Epoch 3156/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9262
Learning Rate: 4.217803066508775e-05
Epoch 3157/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7419
Learning Rate: 4.217803066508775e-05
Epoch 3158/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6739
Learning Rate: 4.217803066508775e-05
Epoch 3159/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7670
Learning Rate: 4.217803066508775e-05
Epoch 3160/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9365
Learning Rate: 4.1756250358436874e-05
Epoch 3161/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8168
Learning Rate: 4.1756250358436874e-05
Epoch 3162/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8437
Learning Rate: 4.1756250358436874e-05
Epoch 3163/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6951
Learning Rate: 4.1756250358436874e-05
Epoch 3164/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7018
Learning Rate: 4.1756250358436874e-05
Epoch 3165/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8770
Learning Rate: 4.1756250358436874e-05
Epoch 3166/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8370
Learning Rate: 4.1756250358436874e-05
Epoch 3167/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7973
Learning Rate: 4.1756250358436874e-05
Epoch 3168/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8222
Learning Rate: 4.1756250358436874e-05
Epoch 3169/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9378
Learning Rate: 4.1756250358436874e-05
Epoch 3170/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8046
Learning Rate: 4.133868785485251e-05
Epoch 3171/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9285
Learning Rate: 4.133868785485251e-05
Epoch 3172/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8194
Learning Rate: 4.133868785485251e-05
Epoch 3173/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8537
Learning Rate: 4.133868785485251e-05
Epoch 3174/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7952
Learning Rate: 4.133868785485251e-05
Epoch 3175/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7709
Learning Rate: 4.133868785485251e-05
Epoch 3176/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8591
Learning Rate: 4.133868785485251e-05
Epoch 3177/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0958
Learning Rate: 4.133868785485251e-05
Epoch 3178/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8127
Learning Rate: 4.133868785485251e-05
Epoch 3179/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9261
Learning Rate: 4.133868785485251e-05
Epoch 3180/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7893
Learning Rate: 4.0925300976303984e-05
Epoch 3181/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1034
Learning Rate: 4.0925300976303984e-05
Epoch 3182/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7987
Learning Rate: 4.0925300976303984e-05
Epoch 3183/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7735
Learning Rate: 4.0925300976303984e-05
Epoch 3184/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7275
Learning Rate: 4.0925300976303984e-05
Epoch 3185/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8361
Learning Rate: 4.0925300976303984e-05
Epoch 3186/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8974
Learning Rate: 4.0925300976303984e-05
Epoch 3187/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8009
Learning Rate: 4.0925300976303984e-05
Epoch 3188/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8984
Learning Rate: 4.0925300976303984e-05
Epoch 3189/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8618
Learning Rate: 4.0925300976303984e-05
Epoch 3190/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7843
Learning Rate: 4.0516047966540946e-05
Epoch 3191/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9182
Learning Rate: 4.0516047966540946e-05
Epoch 3192/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8097
Learning Rate: 4.0516047966540946e-05
Epoch 3193/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7821
Learning Rate: 4.0516047966540946e-05
Epoch 3194/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8372
Learning Rate: 4.0516047966540946e-05
Epoch 3195/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8392
Learning Rate: 4.0516047966540946e-05
Epoch 3196/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8972
Learning Rate: 4.0516047966540946e-05
Epoch 3197/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9076
Learning Rate: 4.0516047966540946e-05
Epoch 3198/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8075
Learning Rate: 4.0516047966540946e-05
Epoch 3199/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7543
Learning Rate: 4.0516047966540946e-05
Epoch 3200/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8708
Learning Rate: 4.0110887486875534e-05
Epoch 3201/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7164
Learning Rate: 4.0110887486875534e-05
Epoch 3202/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8085
Learning Rate: 4.0110887486875534e-05
Epoch 3203/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7262
Learning Rate: 4.0110887486875534e-05
Epoch 3204/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8176
Learning Rate: 4.0110887486875534e-05
Epoch 3205/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7638
Learning Rate: 4.0110887486875534e-05
Epoch 3206/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8104
Learning Rate: 4.0110887486875534e-05
Epoch 3207/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8829
Learning Rate: 4.0110887486875534e-05
Epoch 3208/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8099
Learning Rate: 4.0110887486875534e-05
Epoch 3209/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8153
Learning Rate: 4.0110887486875534e-05
Epoch 3210/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8478
Learning Rate: 3.970977861200678e-05
Epoch 3211/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8229
Learning Rate: 3.970977861200678e-05
Epoch 3212/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7401
Learning Rate: 3.970977861200678e-05
Epoch 3213/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8014
Learning Rate: 3.970977861200678e-05
Epoch 3214/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8656
Learning Rate: 3.970977861200678e-05
Epoch 3215/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8619
Learning Rate: 3.970977861200678e-05
Epoch 3216/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8558
Learning Rate: 3.970977861200678e-05
Epoch 3217/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7745
Learning Rate: 3.970977861200678e-05
Epoch 3218/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7706
Learning Rate: 3.970977861200678e-05
Epoch 3219/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8362
Learning Rate: 3.970977861200678e-05
Epoch 3220/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8048
Learning Rate: 3.9312680825886715e-05
Epoch 3221/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7700
Learning Rate: 3.9312680825886715e-05
Epoch 3222/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7980
Learning Rate: 3.9312680825886715e-05
Epoch 3223/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9650
Learning Rate: 3.9312680825886715e-05
Epoch 3224/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7582
Learning Rate: 3.9312680825886715e-05
Epoch 3225/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8465
Learning Rate: 3.9312680825886715e-05
Epoch 3226/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8932
Learning Rate: 3.9312680825886715e-05
Epoch 3227/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 3.9312680825886715e-05
Epoch 3228/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9175
Learning Rate: 3.9312680825886715e-05
Epoch 3229/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7667
Learning Rate: 3.9312680825886715e-05
Epoch 3230/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7990
Learning Rate: 3.891955401762785e-05
Epoch 3231/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9329
Learning Rate: 3.891955401762785e-05
Epoch 3232/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8772
Learning Rate: 3.891955401762785e-05
Epoch 3233/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8544
Learning Rate: 3.891955401762785e-05
Epoch 3234/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9683
Learning Rate: 3.891955401762785e-05
Epoch 3235/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7953
Learning Rate: 3.891955401762785e-05
Epoch 3236/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8359
Learning Rate: 3.891955401762785e-05
Epoch 3237/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9246
Learning Rate: 3.891955401762785e-05
Epoch 3238/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7859
Learning Rate: 3.891955401762785e-05
Epoch 3239/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8051
Learning Rate: 3.891955401762785e-05
Epoch 3240/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8487
Learning Rate: 3.8530358477451566e-05
Epoch 3241/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8015
Learning Rate: 3.8530358477451566e-05
Epoch 3242/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8846
Learning Rate: 3.8530358477451566e-05
Epoch 3243/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8140
Learning Rate: 3.8530358477451566e-05
Epoch 3244/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8369
Learning Rate: 3.8530358477451566e-05
Epoch 3245/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7879
Learning Rate: 3.8530358477451566e-05
Epoch 3246/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7755
Learning Rate: 3.8530358477451566e-05
Epoch 3247/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7360
Learning Rate: 3.8530358477451566e-05
Epoch 3248/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7837
Learning Rate: 3.8530358477451566e-05
Epoch 3249/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8474
Learning Rate: 3.8530358477451566e-05
Epoch 3250/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7921
Learning Rate: 3.814505489267705e-05
Epoch 3251/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7766
Learning Rate: 3.814505489267705e-05
Epoch 3252/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7789
Learning Rate: 3.814505489267705e-05
Epoch 3253/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8201
Learning Rate: 3.814505489267705e-05
Epoch 3254/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8121
Learning Rate: 3.814505489267705e-05
Epoch 3255/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8565
Learning Rate: 3.814505489267705e-05
Epoch 3256/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9758
Learning Rate: 3.814505489267705e-05
Epoch 3257/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8428
Learning Rate: 3.814505489267705e-05
Epoch 3258/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8833
Learning Rate: 3.814505489267705e-05
Epoch 3259/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9323
Learning Rate: 3.814505489267705e-05
Epoch 3260/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7696
Learning Rate: 3.776360434375028e-05
Epoch 3261/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8045
Learning Rate: 3.776360434375028e-05
Epoch 3262/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8297
Learning Rate: 3.776360434375028e-05
Epoch 3263/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8344
Learning Rate: 3.776360434375028e-05
Epoch 3264/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8447
Learning Rate: 3.776360434375028e-05
Epoch 3265/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7610
Learning Rate: 3.776360434375028e-05
Epoch 3266/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7438
Learning Rate: 3.776360434375028e-05
Epoch 3267/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8065
Learning Rate: 3.776360434375028e-05
Epoch 3268/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7894
Learning Rate: 3.776360434375028e-05
Epoch 3269/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7740
Learning Rate: 3.776360434375028e-05
Epoch 3270/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0610
Learning Rate: 3.738596830031278e-05
Epoch 3271/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8154
Learning Rate: 3.738596830031278e-05
Epoch 3272/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8460
Learning Rate: 3.738596830031278e-05
Epoch 3273/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9841
Learning Rate: 3.738596830031278e-05
Epoch 3274/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8660
Learning Rate: 3.738596830031278e-05
Epoch 3275/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8587
Learning Rate: 3.738596830031278e-05
Epoch 3276/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8174
Learning Rate: 3.738596830031278e-05
Epoch 3277/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7971
Learning Rate: 3.738596830031278e-05
Epoch 3278/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7935
Learning Rate: 3.738596830031278e-05
Epoch 3279/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7289
Learning Rate: 3.738596830031278e-05
Epoch 3280/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8499
Learning Rate: 3.701210861730965e-05
Epoch 3281/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9029
Learning Rate: 3.701210861730965e-05
Epoch 3282/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6933
Learning Rate: 3.701210861730965e-05
Epoch 3283/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8275
Learning Rate: 3.701210861730965e-05
Epoch 3284/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8566
Learning Rate: 3.701210861730965e-05
Epoch 3285/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8094
Learning Rate: 3.701210861730965e-05
Epoch 3286/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8417
Learning Rate: 3.701210861730965e-05
Epoch 3287/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9185
Learning Rate: 3.701210861730965e-05
Epoch 3288/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6890
Learning Rate: 3.701210861730965e-05
Epoch 3289/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8931
Learning Rate: 3.701210861730965e-05
Epoch 3290/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8027
Learning Rate: 3.664198753113655e-05
Epoch 3291/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7282
Learning Rate: 3.664198753113655e-05
Epoch 3292/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9075
Learning Rate: 3.664198753113655e-05
Epoch 3293/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7477
Learning Rate: 3.664198753113655e-05
Epoch 3294/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7422
Learning Rate: 3.664198753113655e-05
Epoch 3295/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7435
Learning Rate: 3.664198753113655e-05
Epoch 3296/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8228
Learning Rate: 3.664198753113655e-05
Epoch 3297/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9107
Learning Rate: 3.664198753113655e-05
Epoch 3298/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8252
Learning Rate: 3.664198753113655e-05
Epoch 3299/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8983
Learning Rate: 3.664198753113655e-05
Epoch 3300/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8294
Learning Rate: 3.6275567655825185e-05
Epoch 3301/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9260
Learning Rate: 3.6275567655825185e-05
Epoch 3302/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7543
Learning Rate: 3.6275567655825185e-05
Epoch 3303/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8114
Learning Rate: 3.6275567655825185e-05
Epoch 3304/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8441
Learning Rate: 3.6275567655825185e-05
Epoch 3305/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8793
Learning Rate: 3.6275567655825185e-05
Epoch 3306/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8018
Learning Rate: 3.6275567655825185e-05
Epoch 3307/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8806
Learning Rate: 3.6275567655825185e-05
Epoch 3308/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9194
Learning Rate: 3.6275567655825185e-05
Epoch 3309/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8008
Learning Rate: 3.6275567655825185e-05
Epoch 3310/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7137
Learning Rate: 3.5912811979266936e-05
Epoch 3311/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8373
Learning Rate: 3.5912811979266936e-05
Epoch 3312/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7902
Learning Rate: 3.5912811979266936e-05
Epoch 3313/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8694
Learning Rate: 3.5912811979266936e-05
Epoch 3314/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9080
Learning Rate: 3.5912811979266936e-05
Epoch 3315/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8732
Learning Rate: 3.5912811979266936e-05
Epoch 3316/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8008
Learning Rate: 3.5912811979266936e-05
Epoch 3317/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8563
Learning Rate: 3.5912811979266936e-05
Epoch 3318/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7770
Learning Rate: 3.5912811979266936e-05
Epoch 3319/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8665
Learning Rate: 3.5912811979266936e-05
Epoch 3320/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8404
Learning Rate: 3.555368385947427e-05
Epoch 3321/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7602
Learning Rate: 3.555368385947427e-05
Epoch 3322/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8608
Learning Rate: 3.555368385947427e-05
Epoch 3323/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7626
Learning Rate: 3.555368385947427e-05
Epoch 3324/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8661
Learning Rate: 3.555368385947427e-05
Epoch 3325/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9242
Learning Rate: 3.555368385947427e-05
Epoch 3326/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9338
Learning Rate: 3.555368385947427e-05
Epoch 3327/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8146
Learning Rate: 3.555368385947427e-05
Epoch 3328/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8360
Learning Rate: 3.555368385947427e-05
Epoch 3329/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8722
Learning Rate: 3.555368385947427e-05
Epoch 3330/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7897
Learning Rate: 3.5198147020879524e-05
Epoch 3331/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7762
Learning Rate: 3.5198147020879524e-05
Epoch 3332/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8125
Learning Rate: 3.5198147020879524e-05
Epoch 3333/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8109
Learning Rate: 3.5198147020879524e-05
Epoch 3334/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8302
Learning Rate: 3.5198147020879524e-05
Epoch 3335/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7656
Learning Rate: 3.5198147020879524e-05
Epoch 3336/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8641
Learning Rate: 3.5198147020879524e-05
Epoch 3337/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9088
Learning Rate: 3.5198147020879524e-05
Epoch 3338/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8179
Learning Rate: 3.5198147020879524e-05
Epoch 3339/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8989
Learning Rate: 3.5198147020879524e-05
Epoch 3340/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7929
Learning Rate: 3.484616555067073e-05
Epoch 3341/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9671
Learning Rate: 3.484616555067073e-05
Epoch 3342/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8981
Learning Rate: 3.484616555067073e-05
Epoch 3343/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7996
Learning Rate: 3.484616555067073e-05
Epoch 3344/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7935
Learning Rate: 3.484616555067073e-05
Epoch 3345/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7662
Learning Rate: 3.484616555067073e-05
Epoch 3346/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7721
Learning Rate: 3.484616555067073e-05
Epoch 3347/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8108
Learning Rate: 3.484616555067073e-05
Epoch 3348/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8600
Learning Rate: 3.484616555067073e-05
Epoch 3349/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8804
Learning Rate: 3.484616555067073e-05
Epoch 3350/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9151
Learning Rate: 3.4497703895164024e-05
Epoch 3351/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7339
Learning Rate: 3.4497703895164024e-05
Epoch 3352/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7412
Learning Rate: 3.4497703895164024e-05
Epoch 3353/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7013
Learning Rate: 3.4497703895164024e-05
Epoch 3354/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7872
Learning Rate: 3.4497703895164024e-05
Epoch 3355/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8173
Learning Rate: 3.4497703895164024e-05
Epoch 3356/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9401
Learning Rate: 3.4497703895164024e-05
Epoch 3357/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9650
Learning Rate: 3.4497703895164024e-05
Epoch 3358/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8023
Learning Rate: 3.4497703895164024e-05
Epoch 3359/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8695
Learning Rate: 3.4497703895164024e-05
Epoch 3360/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9303
Learning Rate: 3.4152726856212386e-05
Epoch 3361/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6830
Learning Rate: 3.4152726856212386e-05
Epoch 3362/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7160
Learning Rate: 3.4152726856212386e-05
Epoch 3363/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9618
Learning Rate: 3.4152726856212386e-05
Epoch 3364/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8367
Learning Rate: 3.4152726856212386e-05
Epoch 3365/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0181
Learning Rate: 3.4152726856212386e-05
Epoch 3366/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8620
Learning Rate: 3.4152726856212386e-05
Epoch 3367/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7970
Learning Rate: 3.4152726856212386e-05
Epoch 3368/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8004
Learning Rate: 3.4152726856212386e-05
Epoch 3369/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8395
Learning Rate: 3.4152726856212386e-05
Epoch 3370/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9017
Learning Rate: 3.381119958765026e-05
Epoch 3371/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7355
Learning Rate: 3.381119958765026e-05
Epoch 3372/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7936
Learning Rate: 3.381119958765026e-05
Epoch 3373/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9708
Learning Rate: 3.381119958765026e-05
Epoch 3374/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8285
Learning Rate: 3.381119958765026e-05
Epoch 3375/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8161
Learning Rate: 3.381119958765026e-05
Epoch 3376/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7916
Learning Rate: 3.381119958765026e-05
Epoch 3377/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9568
Learning Rate: 3.381119958765026e-05
Epoch 3378/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7492
Learning Rate: 3.381119958765026e-05
Epoch 3379/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9519
Learning Rate: 3.381119958765026e-05
Epoch 3380/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8283
Learning Rate: 3.3473087591773755e-05
Epoch 3381/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0259
Learning Rate: 3.3473087591773755e-05
Epoch 3382/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8998
Learning Rate: 3.3473087591773755e-05
Epoch 3383/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7771
Learning Rate: 3.3473087591773755e-05
Epoch 3384/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8928
Learning Rate: 3.3473087591773755e-05
Epoch 3385/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8395
Learning Rate: 3.3473087591773755e-05
Epoch 3386/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7989
Learning Rate: 3.3473087591773755e-05
Epoch 3387/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0311
Learning Rate: 3.3473087591773755e-05
Epoch 3388/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8415
Learning Rate: 3.3473087591773755e-05
Epoch 3389/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8004
Learning Rate: 3.3473087591773755e-05
Epoch 3390/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8714
Learning Rate: 3.313835671585602e-05
Epoch 3391/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6953
Learning Rate: 3.313835671585602e-05
Epoch 3392/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8553
Learning Rate: 3.313835671585602e-05
Epoch 3393/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7335
Learning Rate: 3.313835671585602e-05
Epoch 3394/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8267
Learning Rate: 3.313835671585602e-05
Epoch 3395/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7779
Learning Rate: 3.313835671585602e-05
Epoch 3396/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8918
Learning Rate: 3.313835671585602e-05
Epoch 3397/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7601
Learning Rate: 3.313835671585602e-05
Epoch 3398/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8058
Learning Rate: 3.313835671585602e-05
Epoch 3399/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8443
Learning Rate: 3.313835671585602e-05
Epoch 3400/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7585
Learning Rate: 3.280697314869746e-05
Epoch 3401/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9093
Learning Rate: 3.280697314869746e-05
Epoch 3402/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8368
Learning Rate: 3.280697314869746e-05
Epoch 3403/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8548
Learning Rate: 3.280697314869746e-05
Epoch 3404/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8979
Learning Rate: 3.280697314869746e-05
Epoch 3405/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8409
Learning Rate: 3.280697314869746e-05
Epoch 3406/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7521
Learning Rate: 3.280697314869746e-05
Epoch 3407/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7637
Learning Rate: 3.280697314869746e-05
Epoch 3408/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7970
Learning Rate: 3.280697314869746e-05
Epoch 3409/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9480
Learning Rate: 3.280697314869746e-05
Epoch 3410/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8156
Learning Rate: 3.247890341721048e-05
Epoch 3411/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7745
Learning Rate: 3.247890341721048e-05
Epoch 3412/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8785
Learning Rate: 3.247890341721048e-05
Epoch 3413/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8165
Learning Rate: 3.247890341721048e-05
Epoch 3414/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7516
Learning Rate: 3.247890341721048e-05
Epoch 3415/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7230
Learning Rate: 3.247890341721048e-05
Epoch 3416/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8169
Learning Rate: 3.247890341721048e-05
Epoch 3417/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7952
Learning Rate: 3.247890341721048e-05
Epoch 3418/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9499
Learning Rate: 3.247890341721048e-05
Epoch 3419/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8621
Learning Rate: 3.247890341721048e-05
Epoch 3420/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6990
Learning Rate: 3.215411438303838e-05
Epoch 3421/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8223
Learning Rate: 3.215411438303838e-05
Epoch 3422/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7044
Learning Rate: 3.215411438303838e-05
Epoch 3423/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9573
Learning Rate: 3.215411438303838e-05
Epoch 3424/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7263
Learning Rate: 3.215411438303838e-05
Epoch 3425/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7740
Learning Rate: 3.215411438303838e-05
Epoch 3426/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7427
Learning Rate: 3.215411438303838e-05
Epoch 3427/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8513
Learning Rate: 3.215411438303838e-05
Epoch 3428/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9279
Learning Rate: 3.215411438303838e-05
Epoch 3429/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8982
Learning Rate: 3.215411438303838e-05
Epoch 3430/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9186
Learning Rate: 3.1832573239207997e-05
Epoch 3431/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7654
Learning Rate: 3.1832573239207997e-05
Epoch 3432/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7964
Learning Rate: 3.1832573239207997e-05
Epoch 3433/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7113
Learning Rate: 3.1832573239207997e-05
Epoch 3434/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7382
Learning Rate: 3.1832573239207997e-05
Epoch 3435/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7550
Learning Rate: 3.1832573239207997e-05
Epoch 3436/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9118
Learning Rate: 3.1832573239207997e-05
Epoch 3437/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9908
Learning Rate: 3.1832573239207997e-05
Epoch 3438/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8220
Learning Rate: 3.1832573239207997e-05
Epoch 3439/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7881
Learning Rate: 3.1832573239207997e-05
Epoch 3440/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7325
Learning Rate: 3.1514247506815915e-05
Epoch 3441/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7380
Learning Rate: 3.1514247506815915e-05
Epoch 3442/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7697
Learning Rate: 3.1514247506815915e-05
Epoch 3443/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7296
Learning Rate: 3.1514247506815915e-05
Epoch 3444/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8039
Learning Rate: 3.1514247506815915e-05
Epoch 3445/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8662
Learning Rate: 3.1514247506815915e-05
Epoch 3446/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7958
Learning Rate: 3.1514247506815915e-05
Epoch 3447/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0141
Learning Rate: 3.1514247506815915e-05
Epoch 3448/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7955
Learning Rate: 3.1514247506815915e-05
Epoch 3449/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8164
Learning Rate: 3.1514247506815915e-05
Epoch 3450/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8134
Learning Rate: 3.1199105031747755e-05
Epoch 3451/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8148
Learning Rate: 3.1199105031747755e-05
Epoch 3452/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8169
Learning Rate: 3.1199105031747755e-05
Epoch 3453/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8305
Learning Rate: 3.1199105031747755e-05
Epoch 3454/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9613
Learning Rate: 3.1199105031747755e-05
Epoch 3455/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7463
Learning Rate: 3.1199105031747755e-05
Epoch 3456/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8449
Learning Rate: 3.1199105031747755e-05
Epoch 3457/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8415
Learning Rate: 3.1199105031747755e-05
Epoch 3458/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7828
Learning Rate: 3.1199105031747755e-05
Epoch 3459/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8041
Learning Rate: 3.1199105031747755e-05
Epoch 3460/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8554
Learning Rate: 3.0887113981430276e-05
Epoch 3461/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7810
Learning Rate: 3.0887113981430276e-05
Epoch 3462/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8059
Learning Rate: 3.0887113981430276e-05
Epoch 3463/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7998
Learning Rate: 3.0887113981430276e-05
Epoch 3464/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8611
Learning Rate: 3.0887113981430276e-05
Epoch 3465/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8880
Learning Rate: 3.0887113981430276e-05
Epoch 3466/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8532
Learning Rate: 3.0887113981430276e-05
Epoch 3467/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8550
Learning Rate: 3.0887113981430276e-05
Epoch 3468/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7946
Learning Rate: 3.0887113981430276e-05
Epoch 3469/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8115
Learning Rate: 3.0887113981430276e-05
Epoch 3470/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6800
Learning Rate: 3.0578242841615975e-05
Epoch 3471/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7259
Learning Rate: 3.0578242841615975e-05
Epoch 3472/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7789
Learning Rate: 3.0578242841615975e-05
Epoch 3473/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9136
Learning Rate: 3.0578242841615975e-05
Epoch 3474/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8381
Learning Rate: 3.0578242841615975e-05
Epoch 3475/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8391
Learning Rate: 3.0578242841615975e-05
Epoch 3476/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8515
Learning Rate: 3.0578242841615975e-05
Epoch 3477/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7452
Learning Rate: 3.0578242841615975e-05
Epoch 3478/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8350
Learning Rate: 3.0578242841615975e-05
Epoch 3479/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7237
Learning Rate: 3.0578242841615975e-05
Epoch 3480/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8790
Learning Rate: 3.0272460413199815e-05
Epoch 3481/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7981
Learning Rate: 3.0272460413199815e-05
Epoch 3482/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8932
Learning Rate: 3.0272460413199815e-05
Epoch 3483/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8923
Learning Rate: 3.0272460413199815e-05
Epoch 3484/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8274
Learning Rate: 3.0272460413199815e-05
Epoch 3485/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7240
Learning Rate: 3.0272460413199815e-05
Epoch 3486/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9061
Learning Rate: 3.0272460413199815e-05
Epoch 3487/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8117
Learning Rate: 3.0272460413199815e-05
Epoch 3488/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8162
Learning Rate: 3.0272460413199815e-05
Epoch 3489/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8327
Learning Rate: 3.0272460413199815e-05
Epoch 3490/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8905
Learning Rate: 2.9969735809067817e-05
Epoch 3491/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7993
Learning Rate: 2.9969735809067817e-05
Epoch 3492/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8736
Learning Rate: 2.9969735809067817e-05
Epoch 3493/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9045
Learning Rate: 2.9969735809067817e-05
Epoch 3494/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7356
Learning Rate: 2.9969735809067817e-05
Epoch 3495/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8332
Learning Rate: 2.9969735809067817e-05
Epoch 3496/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7364
Learning Rate: 2.9969735809067817e-05
Epoch 3497/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8548
Learning Rate: 2.9969735809067817e-05
Epoch 3498/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8615
Learning Rate: 2.9969735809067817e-05
Epoch 3499/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8762
Learning Rate: 2.9969735809067817e-05
Epoch 3500/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8238
Learning Rate: 2.967003845097714e-05
Epoch 3501/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7984
Learning Rate: 2.967003845097714e-05
Epoch 3502/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8459
Learning Rate: 2.967003845097714e-05
Epoch 3503/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7357
Learning Rate: 2.967003845097714e-05
Epoch 3504/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7302
Learning Rate: 2.967003845097714e-05
Epoch 3505/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8063
Learning Rate: 2.967003845097714e-05
Epoch 3506/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8272
Learning Rate: 2.967003845097714e-05
Epoch 3507/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8172
Learning Rate: 2.967003845097714e-05
Epoch 3508/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8417
Learning Rate: 2.967003845097714e-05
Epoch 3509/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8053
Learning Rate: 2.967003845097714e-05
Epoch 3510/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7613
Learning Rate: 2.9373338066467366e-05
Epoch 3511/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7567
Learning Rate: 2.9373338066467366e-05
Epoch 3512/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8009
Learning Rate: 2.9373338066467366e-05
Epoch 3513/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8682
Learning Rate: 2.9373338066467366e-05
Epoch 3514/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7813
Learning Rate: 2.9373338066467366e-05
Epoch 3515/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7847
Learning Rate: 2.9373338066467366e-05
Epoch 3516/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8442
Learning Rate: 2.9373338066467366e-05
Epoch 3517/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7707
Learning Rate: 2.9373338066467366e-05
Epoch 3518/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8537
Learning Rate: 2.9373338066467366e-05
Epoch 3519/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8429
Learning Rate: 2.9373338066467366e-05
Epoch 3520/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8345
Learning Rate: 2.9079604685802692e-05
Epoch 3521/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8291
Learning Rate: 2.9079604685802692e-05
Epoch 3522/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8301
Learning Rate: 2.9079604685802692e-05
Epoch 3523/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7971
Learning Rate: 2.9079604685802692e-05
Epoch 3524/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8890
Learning Rate: 2.9079604685802692e-05
Epoch 3525/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7231
Learning Rate: 2.9079604685802692e-05
Epoch 3526/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8487
Learning Rate: 2.9079604685802692e-05
Epoch 3527/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8242
Learning Rate: 2.9079604685802692e-05
Epoch 3528/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8945
Learning Rate: 2.9079604685802692e-05
Epoch 3529/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9408
Learning Rate: 2.9079604685802692e-05
Epoch 3530/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8346
Learning Rate: 2.8788808638944663e-05
Epoch 3531/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 2.8788808638944663e-05
Epoch 3532/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8209
Learning Rate: 2.8788808638944663e-05
Epoch 3533/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6729
Learning Rate: 2.8788808638944663e-05
Epoch 3534/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7638
Learning Rate: 2.8788808638944663e-05
Epoch 3535/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7108
Learning Rate: 2.8788808638944663e-05
Epoch 3536/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7563
Learning Rate: 2.8788808638944663e-05
Epoch 3537/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7130
Learning Rate: 2.8788808638944663e-05
Epoch 3538/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7048
Learning Rate: 2.8788808638944663e-05
Epoch 3539/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8361
Learning Rate: 2.8788808638944663e-05
Epoch 3540/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7044
Learning Rate: 2.8500920552555216e-05
Epoch 3541/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9050
Learning Rate: 2.8500920552555216e-05
Epoch 3542/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8795
Learning Rate: 2.8500920552555216e-05
Epoch 3543/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7597
Learning Rate: 2.8500920552555216e-05
Epoch 3544/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7437
Learning Rate: 2.8500920552555216e-05
Epoch 3545/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8922
Learning Rate: 2.8500920552555216e-05
Epoch 3546/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8121
Learning Rate: 2.8500920552555216e-05
Epoch 3547/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7497
Learning Rate: 2.8500920552555216e-05
Epoch 3548/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8394
Learning Rate: 2.8500920552555216e-05
Epoch 3549/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7612
Learning Rate: 2.8500920552555216e-05
Epoch 3550/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7812
Learning Rate: 2.8215911347029665e-05
Epoch 3551/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9539
Learning Rate: 2.8215911347029665e-05
Epoch 3552/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8444
Learning Rate: 2.8215911347029665e-05
Epoch 3553/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7950
Learning Rate: 2.8215911347029665e-05
Epoch 3554/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8538
Learning Rate: 2.8215911347029665e-05
Epoch 3555/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8618
Learning Rate: 2.8215911347029665e-05
Epoch 3556/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7998
Learning Rate: 2.8215911347029665e-05
Epoch 3557/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8610
Learning Rate: 2.8215911347029665e-05
Epoch 3558/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7704
Learning Rate: 2.8215911347029665e-05
Epoch 3559/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8525
Learning Rate: 2.8215911347029665e-05
Epoch 3560/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8564
Learning Rate: 2.793375223355937e-05
Epoch 3561/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7832
Learning Rate: 2.793375223355937e-05
Epoch 3562/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9219
Learning Rate: 2.793375223355937e-05
Epoch 3563/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7722
Learning Rate: 2.793375223355937e-05
Epoch 3564/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8589
Learning Rate: 2.793375223355937e-05
Epoch 3565/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8787
Learning Rate: 2.793375223355937e-05
Epoch 3566/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7946
Learning Rate: 2.793375223355937e-05
Epoch 3567/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8959
Learning Rate: 2.793375223355937e-05
Epoch 3568/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8036
Learning Rate: 2.793375223355937e-05
Epoch 3569/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8722
Learning Rate: 2.793375223355937e-05
Epoch 3570/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7931
Learning Rate: 2.7654414711223776e-05
Epoch 3571/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7949
Learning Rate: 2.7654414711223776e-05
Epoch 3572/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7368
Learning Rate: 2.7654414711223776e-05
Epoch 3573/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7569
Learning Rate: 2.7654414711223776e-05
Epoch 3574/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7869
Learning Rate: 2.7654414711223776e-05
Epoch 3575/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7567
Learning Rate: 2.7654414711223776e-05
Epoch 3576/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8737
Learning Rate: 2.7654414711223776e-05
Epoch 3577/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8395
Learning Rate: 2.7654414711223776e-05
Epoch 3578/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8725
Learning Rate: 2.7654414711223776e-05
Epoch 3579/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8663
Learning Rate: 2.7654414711223776e-05
Epoch 3580/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9750
Learning Rate: 2.7377870564111538e-05
Epoch 3581/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7724
Learning Rate: 2.7377870564111538e-05
Epoch 3582/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7815
Learning Rate: 2.7377870564111538e-05
Epoch 3583/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7644
Learning Rate: 2.7377870564111538e-05
Epoch 3584/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8143
Learning Rate: 2.7377870564111538e-05
Epoch 3585/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7245
Learning Rate: 2.7377870564111538e-05
Epoch 3586/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7561
Learning Rate: 2.7377870564111538e-05
Epoch 3587/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8920
Learning Rate: 2.7377870564111538e-05
Epoch 3588/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7128
Learning Rate: 2.7377870564111538e-05
Epoch 3589/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8823
Learning Rate: 2.7377870564111538e-05
Epoch 3590/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9596
Learning Rate: 2.7104091858470422e-05
Epoch 3591/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7511
Learning Rate: 2.7104091858470422e-05
Epoch 3592/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8017
Learning Rate: 2.7104091858470422e-05
Epoch 3593/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8105
Learning Rate: 2.7104091858470422e-05
Epoch 3594/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9139
Learning Rate: 2.7104091858470422e-05
Epoch 3595/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7080
Learning Rate: 2.7104091858470422e-05
Epoch 3596/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7210
Learning Rate: 2.7104091858470422e-05
Epoch 3597/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8154
Learning Rate: 2.7104091858470422e-05
Epoch 3598/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8698
Learning Rate: 2.7104091858470422e-05
Epoch 3599/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8265
Learning Rate: 2.7104091858470422e-05
Epoch 3600/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8072
Learning Rate: 2.6833050939885718e-05
Epoch 3601/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8954
Learning Rate: 2.6833050939885718e-05
Epoch 3602/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6929
Learning Rate: 2.6833050939885718e-05
Epoch 3603/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7936
Learning Rate: 2.6833050939885718e-05
Epoch 3604/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7706
Learning Rate: 2.6833050939885718e-05
Epoch 3605/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8610
Learning Rate: 2.6833050939885718e-05
Epoch 3606/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7097
Learning Rate: 2.6833050939885718e-05
Epoch 3607/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7746
Learning Rate: 2.6833050939885718e-05
Epoch 3608/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7074
Learning Rate: 2.6833050939885718e-05
Epoch 3609/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7231
Learning Rate: 2.6833050939885718e-05
Epoch 3610/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7592
Learning Rate: 2.656472043048686e-05
Epoch 3611/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8050
Learning Rate: 2.656472043048686e-05
Epoch 3612/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8557
Learning Rate: 2.656472043048686e-05
Epoch 3613/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7572
Learning Rate: 2.656472043048686e-05
Epoch 3614/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7754
Learning Rate: 2.656472043048686e-05
Epoch 3615/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8381
Learning Rate: 2.656472043048686e-05
Epoch 3616/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7487
Learning Rate: 2.656472043048686e-05
Epoch 3617/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8436
Learning Rate: 2.656472043048686e-05
Epoch 3618/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7569
Learning Rate: 2.656472043048686e-05
Epoch 3619/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8637
Learning Rate: 2.656472043048686e-05
Epoch 3620/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8697
Learning Rate: 2.629907322618199e-05
Epoch 3621/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7563
Learning Rate: 2.629907322618199e-05
Epoch 3622/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7299
Learning Rate: 2.629907322618199e-05
Epoch 3623/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7888
Learning Rate: 2.629907322618199e-05
Epoch 3624/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8333
Learning Rate: 2.629907322618199e-05
Epoch 3625/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8333
Learning Rate: 2.629907322618199e-05
Epoch 3626/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8243
Learning Rate: 2.629907322618199e-05
Epoch 3627/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7759
Learning Rate: 2.629907322618199e-05
Epoch 3628/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7958
Learning Rate: 2.629907322618199e-05
Epoch 3629/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8455
Learning Rate: 2.629907322618199e-05
Epoch 3630/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7986
Learning Rate: 2.6036082493920172e-05
Epoch 3631/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7345
Learning Rate: 2.6036082493920172e-05
Epoch 3632/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8662
Learning Rate: 2.6036082493920172e-05
Epoch 3633/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8393
Learning Rate: 2.6036082493920172e-05
Epoch 3634/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8509
Learning Rate: 2.6036082493920172e-05
Epoch 3635/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7220
Learning Rate: 2.6036082493920172e-05
Epoch 3636/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8897
Learning Rate: 2.6036082493920172e-05
Epoch 3637/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7509
Learning Rate: 2.6036082493920172e-05
Epoch 3638/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7876
Learning Rate: 2.6036082493920172e-05
Epoch 3639/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8989
Learning Rate: 2.6036082493920172e-05
Epoch 3640/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8246
Learning Rate: 2.577572166898097e-05
Epoch 3641/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8379
Learning Rate: 2.577572166898097e-05
Epoch 3642/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9592
Learning Rate: 2.577572166898097e-05
Epoch 3643/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8294
Learning Rate: 2.577572166898097e-05
Epoch 3644/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8303
Learning Rate: 2.577572166898097e-05
Epoch 3645/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7869
Learning Rate: 2.577572166898097e-05
Epoch 3646/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8013
Learning Rate: 2.577572166898097e-05
Epoch 3647/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8936
Learning Rate: 2.577572166898097e-05
Epoch 3648/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7998
Learning Rate: 2.577572166898097e-05
Epoch 3649/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8480
Learning Rate: 2.577572166898097e-05
Epoch 3650/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7181
Learning Rate: 2.551796445229116e-05
Epoch 3651/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7550
Learning Rate: 2.551796445229116e-05
Epoch 3652/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7650
Learning Rate: 2.551796445229116e-05
Epoch 3653/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8848
Learning Rate: 2.551796445229116e-05
Epoch 3654/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7905
Learning Rate: 2.551796445229116e-05
Epoch 3655/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8150
Learning Rate: 2.551796445229116e-05
Epoch 3656/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7296
Learning Rate: 2.551796445229116e-05
Epoch 3657/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9389
Learning Rate: 2.551796445229116e-05
Epoch 3658/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8320
Learning Rate: 2.551796445229116e-05
Epoch 3659/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7509
Learning Rate: 2.551796445229116e-05
Epoch 3660/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7417
Learning Rate: 2.5262784807768248e-05
Epoch 3661/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7843
Learning Rate: 2.5262784807768248e-05
Epoch 3662/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8454
Learning Rate: 2.5262784807768248e-05
Epoch 3663/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9233
Learning Rate: 2.5262784807768248e-05
Epoch 3664/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9059
Learning Rate: 2.5262784807768248e-05
Epoch 3665/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7586
Learning Rate: 2.5262784807768248e-05
Epoch 3666/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7511
Learning Rate: 2.5262784807768248e-05
Epoch 3667/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8483
Learning Rate: 2.5262784807768248e-05
Epoch 3668/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8291
Learning Rate: 2.5262784807768248e-05
Epoch 3669/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8365
Learning Rate: 2.5262784807768248e-05
Epoch 3670/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8630
Learning Rate: 2.5010156959690565e-05
Epoch 3671/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8927
Learning Rate: 2.5010156959690565e-05
Epoch 3672/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8589
Learning Rate: 2.5010156959690565e-05
Epoch 3673/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7938
Learning Rate: 2.5010156959690565e-05
Epoch 3674/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7813
Learning Rate: 2.5010156959690565e-05
Epoch 3675/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7909
Learning Rate: 2.5010156959690565e-05
Epoch 3676/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7231
Learning Rate: 2.5010156959690565e-05
Epoch 3677/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8195
Learning Rate: 2.5010156959690565e-05
Epoch 3678/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8009
Learning Rate: 2.5010156959690565e-05
Epoch 3679/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 2.5010156959690565e-05
Epoch 3680/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9249
Learning Rate: 2.476005539009366e-05
Epoch 3681/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8636
Learning Rate: 2.476005539009366e-05
Epoch 3682/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9314
Learning Rate: 2.476005539009366e-05
Epoch 3683/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8691
Learning Rate: 2.476005539009366e-05
Epoch 3684/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7327
Learning Rate: 2.476005539009366e-05
Epoch 3685/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7826
Learning Rate: 2.476005539009366e-05
Epoch 3686/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7671
Learning Rate: 2.476005539009366e-05
Epoch 3687/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8325
Learning Rate: 2.476005539009366e-05
Epoch 3688/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8379
Learning Rate: 2.476005539009366e-05
Epoch 3689/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7610
Learning Rate: 2.476005539009366e-05
Epoch 3690/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7478
Learning Rate: 2.451245483619272e-05
Epoch 3691/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7119
Learning Rate: 2.451245483619272e-05
Epoch 3692/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9000
Learning Rate: 2.451245483619272e-05
Epoch 3693/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8395
Learning Rate: 2.451245483619272e-05
Epoch 3694/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8248
Learning Rate: 2.451245483619272e-05
Epoch 3695/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7680
Learning Rate: 2.451245483619272e-05
Epoch 3696/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7663
Learning Rate: 2.451245483619272e-05
Epoch 3697/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8122
Learning Rate: 2.451245483619272e-05
Epoch 3698/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8153
Learning Rate: 2.451245483619272e-05
Epoch 3699/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8578
Learning Rate: 2.451245483619272e-05
Epoch 3700/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7463
Learning Rate: 2.4267330287830794e-05
Epoch 3701/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8424
Learning Rate: 2.4267330287830794e-05
Epoch 3702/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8316
Learning Rate: 2.4267330287830794e-05
Epoch 3703/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8601
Learning Rate: 2.4267330287830794e-05
Epoch 3704/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8887
Learning Rate: 2.4267330287830794e-05
Epoch 3705/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7408
Learning Rate: 2.4267330287830794e-05
Epoch 3706/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8034
Learning Rate: 2.4267330287830794e-05
Epoch 3707/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7638
Learning Rate: 2.4267330287830794e-05
Epoch 3708/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8014
Learning Rate: 2.4267330287830794e-05
Epoch 3709/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8677
Learning Rate: 2.4267330287830794e-05
Epoch 3710/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8231
Learning Rate: 2.4024656984952486e-05
Epoch 3711/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8832
Learning Rate: 2.4024656984952486e-05
Epoch 3712/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7707
Learning Rate: 2.4024656984952486e-05
Epoch 3713/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8181
Learning Rate: 2.4024656984952486e-05
Epoch 3714/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9116
Learning Rate: 2.4024656984952486e-05
Epoch 3715/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0402
Learning Rate: 2.4024656984952486e-05
Epoch 3716/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7798
Learning Rate: 2.4024656984952486e-05
Epoch 3717/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6626
Learning Rate: 2.4024656984952486e-05
Epoch 3718/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8166
Learning Rate: 2.4024656984952486e-05
Epoch 3719/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7883
Learning Rate: 2.4024656984952486e-05
Epoch 3720/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8667
Learning Rate: 2.3784410415102963e-05
Epoch 3721/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8310
Learning Rate: 2.3784410415102963e-05
Epoch 3722/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8188
Learning Rate: 2.3784410415102963e-05
Epoch 3723/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8205
Learning Rate: 2.3784410415102963e-05
Epoch 3724/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7232
Learning Rate: 2.3784410415102963e-05
Epoch 3725/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7363
Learning Rate: 2.3784410415102963e-05
Epoch 3726/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8085
Learning Rate: 2.3784410415102963e-05
Epoch 3727/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7517
Learning Rate: 2.3784410415102963e-05
Epoch 3728/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8734
Learning Rate: 2.3784410415102963e-05
Epoch 3729/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6960
Learning Rate: 2.3784410415102963e-05
Epoch 3730/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9529
Learning Rate: 2.3546566310951934e-05
Epoch 3731/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8239
Learning Rate: 2.3546566310951934e-05
Epoch 3732/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7922
Learning Rate: 2.3546566310951934e-05
Epoch 3733/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8502
Learning Rate: 2.3546566310951934e-05
Epoch 3734/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8188
Learning Rate: 2.3546566310951934e-05
Epoch 3735/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8681
Learning Rate: 2.3546566310951934e-05
Epoch 3736/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8725
Learning Rate: 2.3546566310951934e-05
Epoch 3737/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8607
Learning Rate: 2.3546566310951934e-05
Epoch 3738/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7775
Learning Rate: 2.3546566310951934e-05
Epoch 3739/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8316
Learning Rate: 2.3546566310951934e-05
Epoch 3740/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8173
Learning Rate: 2.3311100647842414e-05
Epoch 3741/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8061
Learning Rate: 2.3311100647842414e-05
Epoch 3742/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8273
Learning Rate: 2.3311100647842414e-05
Epoch 3743/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7691
Learning Rate: 2.3311100647842414e-05
Epoch 3744/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7063
Learning Rate: 2.3311100647842414e-05
Epoch 3745/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7436
Learning Rate: 2.3311100647842414e-05
Epoch 3746/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8560
Learning Rate: 2.3311100647842414e-05
Epoch 3747/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7291
Learning Rate: 2.3311100647842414e-05
Epoch 3748/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8277
Learning Rate: 2.3311100647842414e-05
Epoch 3749/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7170
Learning Rate: 2.3311100647842414e-05
Epoch 3750/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8686
Learning Rate: 2.307798964136399e-05
Epoch 3751/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8292
Learning Rate: 2.307798964136399e-05
Epoch 3752/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8144
Learning Rate: 2.307798964136399e-05
Epoch 3753/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7948
Learning Rate: 2.307798964136399e-05
Epoch 3754/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8755
Learning Rate: 2.307798964136399e-05
Epoch 3755/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9037
Learning Rate: 2.307798964136399e-05
Epoch 3756/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8789
Learning Rate: 2.307798964136399e-05
Epoch 3757/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8026
Learning Rate: 2.307798964136399e-05
Epoch 3758/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8257
Learning Rate: 2.307798964136399e-05
Epoch 3759/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7830
Learning Rate: 2.307798964136399e-05
Epoch 3760/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7590
Learning Rate: 2.284720974495035e-05
Epoch 3761/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8502
Learning Rate: 2.284720974495035e-05
Epoch 3762/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7484
Learning Rate: 2.284720974495035e-05
Epoch 3763/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7757
Learning Rate: 2.284720974495035e-05
Epoch 3764/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7791
Learning Rate: 2.284720974495035e-05
Epoch 3765/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7695
Learning Rate: 2.284720974495035e-05
Epoch 3766/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8197
Learning Rate: 2.284720974495035e-05
Epoch 3767/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7035
Learning Rate: 2.284720974495035e-05
Epoch 3768/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7457
Learning Rate: 2.284720974495035e-05
Epoch 3769/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8337
Learning Rate: 2.284720974495035e-05
Epoch 3770/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9504
Learning Rate: 2.2618737647500847e-05
Epoch 3771/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7042
Learning Rate: 2.2618737647500847e-05
Epoch 3772/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8885
Learning Rate: 2.2618737647500847e-05
Epoch 3773/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7587
Learning Rate: 2.2618737647500847e-05
Epoch 3774/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9060
Learning Rate: 2.2618737647500847e-05
Epoch 3775/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8544
Learning Rate: 2.2618737647500847e-05
Epoch 3776/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9087
Learning Rate: 2.2618737647500847e-05
Epoch 3777/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7523
Learning Rate: 2.2618737647500847e-05
Epoch 3778/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8118
Learning Rate: 2.2618737647500847e-05
Epoch 3779/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8518
Learning Rate: 2.2618737647500847e-05
Epoch 3780/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7951
Learning Rate: 2.239255027102584e-05
Epoch 3781/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7888
Learning Rate: 2.239255027102584e-05
Epoch 3782/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8930
Learning Rate: 2.239255027102584e-05
Epoch 3783/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9393
Learning Rate: 2.239255027102584e-05
Epoch 3784/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7781
Learning Rate: 2.239255027102584e-05
Epoch 3785/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9054
Learning Rate: 2.239255027102584e-05
Epoch 3786/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7631
Learning Rate: 2.239255027102584e-05
Epoch 3787/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8815
Learning Rate: 2.239255027102584e-05
Epoch 3788/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7134
Learning Rate: 2.239255027102584e-05
Epoch 3789/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9175
Learning Rate: 2.239255027102584e-05
Epoch 3790/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8191
Learning Rate: 2.2168624768315582e-05
Epoch 3791/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7938
Learning Rate: 2.2168624768315582e-05
Epoch 3792/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8304
Learning Rate: 2.2168624768315582e-05
Epoch 3793/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7006
Learning Rate: 2.2168624768315582e-05
Epoch 3794/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8523
Learning Rate: 2.2168624768315582e-05
Epoch 3795/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7993
Learning Rate: 2.2168624768315582e-05
Epoch 3796/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6976
Learning Rate: 2.2168624768315582e-05
Epoch 3797/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8390
Learning Rate: 2.2168624768315582e-05
Epoch 3798/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7757
Learning Rate: 2.2168624768315582e-05
Epoch 3799/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7953
Learning Rate: 2.2168624768315582e-05
Epoch 3800/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8485
Learning Rate: 2.1946938520632425e-05
Epoch 3801/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9754
Learning Rate: 2.1946938520632425e-05
Epoch 3802/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8221
Learning Rate: 2.1946938520632425e-05
Epoch 3803/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7864
Learning Rate: 2.1946938520632425e-05
Epoch 3804/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7884
Learning Rate: 2.1946938520632425e-05
Epoch 3805/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8267
Learning Rate: 2.1946938520632425e-05
Epoch 3806/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7487
Learning Rate: 2.1946938520632425e-05
Epoch 3807/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8782
Learning Rate: 2.1946938520632425e-05
Epoch 3808/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7627
Learning Rate: 2.1946938520632425e-05
Epoch 3809/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8088
Learning Rate: 2.1946938520632425e-05
Epoch 3810/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8552
Learning Rate: 2.17274691354261e-05
Epoch 3811/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8225
Learning Rate: 2.17274691354261e-05
Epoch 3812/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8722
Learning Rate: 2.17274691354261e-05
Epoch 3813/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7742
Learning Rate: 2.17274691354261e-05
Epoch 3814/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7594
Learning Rate: 2.17274691354261e-05
Epoch 3815/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9447
Learning Rate: 2.17274691354261e-05
Epoch 3816/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7520
Learning Rate: 2.17274691354261e-05
Epoch 3817/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7994
Learning Rate: 2.17274691354261e-05
Epoch 3818/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8021
Learning Rate: 2.17274691354261e-05
Epoch 3819/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8435
Learning Rate: 2.17274691354261e-05
Epoch 3820/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8130
Learning Rate: 2.151019444407184e-05
Epoch 3821/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7202
Learning Rate: 2.151019444407184e-05
Epoch 3822/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8696
Learning Rate: 2.151019444407184e-05
Epoch 3823/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8807
Learning Rate: 2.151019444407184e-05
Epoch 3824/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7268
Learning Rate: 2.151019444407184e-05
Epoch 3825/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7624
Learning Rate: 2.151019444407184e-05
Epoch 3826/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8782
Learning Rate: 2.151019444407184e-05
Epoch 3827/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7961
Learning Rate: 2.151019444407184e-05
Epoch 3828/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8459
Learning Rate: 2.151019444407184e-05
Epoch 3829/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8277
Learning Rate: 2.151019444407184e-05
Epoch 3830/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8911
Learning Rate: 2.129509249963112e-05
Epoch 3831/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7795
Learning Rate: 2.129509249963112e-05
Epoch 3832/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7496
Learning Rate: 2.129509249963112e-05
Epoch 3833/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8769
Learning Rate: 2.129509249963112e-05
Epoch 3834/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7391
Learning Rate: 2.129509249963112e-05
Epoch 3835/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7575
Learning Rate: 2.129509249963112e-05
Epoch 3836/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8362
Learning Rate: 2.129509249963112e-05
Epoch 3837/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8028
Learning Rate: 2.129509249963112e-05
Epoch 3838/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9334
Learning Rate: 2.129509249963112e-05
Epoch 3839/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8059
Learning Rate: 2.129509249963112e-05
Epoch 3840/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7471
Learning Rate: 2.1082141574634807e-05
Epoch 3841/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9245
Learning Rate: 2.1082141574634807e-05
Epoch 3842/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8566
Learning Rate: 2.1082141574634807e-05
Epoch 3843/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8199
Learning Rate: 2.1082141574634807e-05
Epoch 3844/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7594
Learning Rate: 2.1082141574634807e-05
Epoch 3845/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7711
Learning Rate: 2.1082141574634807e-05
Epoch 3846/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8032
Learning Rate: 2.1082141574634807e-05
Epoch 3847/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8942
Learning Rate: 2.1082141574634807e-05
Epoch 3848/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9258
Learning Rate: 2.1082141574634807e-05
Epoch 3849/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6671
Learning Rate: 2.1082141574634807e-05
Epoch 3850/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9141
Learning Rate: 2.087132015888846e-05
Epoch 3851/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8240
Learning Rate: 2.087132015888846e-05
Epoch 3852/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8145
Learning Rate: 2.087132015888846e-05
Epoch 3853/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8166
Learning Rate: 2.087132015888846e-05
Epoch 3854/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8441
Learning Rate: 2.087132015888846e-05
Epoch 3855/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8455
Learning Rate: 2.087132015888846e-05
Epoch 3856/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9072
Learning Rate: 2.087132015888846e-05
Epoch 3857/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7642
Learning Rate: 2.087132015888846e-05
Epoch 3858/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8420
Learning Rate: 2.087132015888846e-05
Epoch 3859/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7469
Learning Rate: 2.087132015888846e-05
Epoch 3860/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8488
Learning Rate: 2.0662606957299575e-05
Epoch 3861/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8100
Learning Rate: 2.0662606957299575e-05
Epoch 3862/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7867
Learning Rate: 2.0662606957299575e-05
Epoch 3863/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8275
Learning Rate: 2.0662606957299575e-05
Epoch 3864/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8028
Learning Rate: 2.0662606957299575e-05
Epoch 3865/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8055
Learning Rate: 2.0662606957299575e-05
Epoch 3866/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9261
Learning Rate: 2.0662606957299575e-05
Epoch 3867/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9318
Learning Rate: 2.0662606957299575e-05
Epoch 3868/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7647
Learning Rate: 2.0662606957299575e-05
Epoch 3869/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7631
Learning Rate: 2.0662606957299575e-05
Epoch 3870/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7561
Learning Rate: 2.045598088772658e-05
Epoch 3871/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9865
Learning Rate: 2.045598088772658e-05
Epoch 3872/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7455
Learning Rate: 2.045598088772658e-05
Epoch 3873/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9563
Learning Rate: 2.045598088772658e-05
Epoch 3874/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8401
Learning Rate: 2.045598088772658e-05
Epoch 3875/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8206
Learning Rate: 2.045598088772658e-05
Epoch 3876/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7903
Learning Rate: 2.045598088772658e-05
Epoch 3877/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7320
Learning Rate: 2.045598088772658e-05
Epoch 3878/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7501
Learning Rate: 2.045598088772658e-05
Epoch 3879/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7690
Learning Rate: 2.045598088772658e-05
Epoch 3880/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8065
Learning Rate: 2.0251421078849313e-05
Epoch 3881/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7313
Learning Rate: 2.0251421078849313e-05
Epoch 3882/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7738
Learning Rate: 2.0251421078849313e-05
Epoch 3883/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7716
Learning Rate: 2.0251421078849313e-05
Epoch 3884/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9966
Learning Rate: 2.0251421078849313e-05
Epoch 3885/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8646
Learning Rate: 2.0251421078849313e-05
Epoch 3886/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8600
Learning Rate: 2.0251421078849313e-05
Epoch 3887/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8370
Learning Rate: 2.0251421078849313e-05
Epoch 3888/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8370
Learning Rate: 2.0251421078849313e-05
Epoch 3889/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8869
Learning Rate: 2.0251421078849313e-05
Epoch 3890/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8148
Learning Rate: 2.0048906868060818e-05
Epoch 3891/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8883
Learning Rate: 2.0048906868060818e-05
Epoch 3892/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8418
Learning Rate: 2.0048906868060818e-05
Epoch 3893/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8396
Learning Rate: 2.0048906868060818e-05
Epoch 3894/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8021
Learning Rate: 2.0048906868060818e-05
Epoch 3895/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9230
Learning Rate: 2.0048906868060818e-05
Epoch 3896/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7580
Learning Rate: 2.0048906868060818e-05
Epoch 3897/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7060
Learning Rate: 2.0048906868060818e-05
Epoch 3898/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8711
Learning Rate: 2.0048906868060818e-05
Epoch 3899/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7392
Learning Rate: 2.0048906868060818e-05
Epoch 3900/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7265
Learning Rate: 1.984841779938021e-05
Epoch 3901/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8075
Learning Rate: 1.984841779938021e-05
Epoch 3902/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7510
Learning Rate: 1.984841779938021e-05
Epoch 3903/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8319
Learning Rate: 1.984841779938021e-05
Epoch 3904/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7597
Learning Rate: 1.984841779938021e-05
Epoch 3905/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7365
Learning Rate: 1.984841779938021e-05
Epoch 3906/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7060
Learning Rate: 1.984841779938021e-05
Epoch 3907/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7203
Learning Rate: 1.984841779938021e-05
Epoch 3908/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8888
Learning Rate: 1.984841779938021e-05
Epoch 3909/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9112
Learning Rate: 1.984841779938021e-05
Epoch 3910/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8480
Learning Rate: 1.9649933621386406e-05
Epoch 3911/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8595
Learning Rate: 1.9649933621386406e-05
Epoch 3912/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9023
Learning Rate: 1.9649933621386406e-05
Epoch 3913/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8174
Learning Rate: 1.9649933621386406e-05
Epoch 3914/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7807
Learning Rate: 1.9649933621386406e-05
Epoch 3915/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7878
Learning Rate: 1.9649933621386406e-05
Epoch 3916/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7504
Learning Rate: 1.9649933621386406e-05
Epoch 3917/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7376
Learning Rate: 1.9649933621386406e-05
Epoch 3918/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.9649933621386406e-05
Epoch 3919/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8030
Learning Rate: 1.9649933621386406e-05
Epoch 3920/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8355
Learning Rate: 1.9453434285172543e-05
Epoch 3921/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7977
Learning Rate: 1.9453434285172543e-05
Epoch 3922/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9381
Learning Rate: 1.9453434285172543e-05
Epoch 3923/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8647
Learning Rate: 1.9453434285172543e-05
Epoch 3924/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8602
Learning Rate: 1.9453434285172543e-05
Epoch 3925/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8757
Learning Rate: 1.9453434285172543e-05
Epoch 3926/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0085
Learning Rate: 1.9453434285172543e-05
Epoch 3927/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8788
Learning Rate: 1.9453434285172543e-05
Epoch 3928/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7373
Learning Rate: 1.9453434285172543e-05
Epoch 3929/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7402
Learning Rate: 1.9453434285172543e-05
Epoch 3930/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9930
Learning Rate: 1.9258899942320817e-05
Epoch 3931/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8821
Learning Rate: 1.9258899942320817e-05
Epoch 3932/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8195
Learning Rate: 1.9258899942320817e-05
Epoch 3933/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8484
Learning Rate: 1.9258899942320817e-05
Epoch 3934/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8824
Learning Rate: 1.9258899942320817e-05
Epoch 3935/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8340
Learning Rate: 1.9258899942320817e-05
Epoch 3936/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8392
Learning Rate: 1.9258899942320817e-05
Epoch 3937/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7811
Learning Rate: 1.9258899942320817e-05
Epoch 3938/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7487
Learning Rate: 1.9258899942320817e-05
Epoch 3939/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8051
Learning Rate: 1.9258899942320817e-05
Epoch 3940/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8422
Learning Rate: 1.9066310942897607e-05
Epoch 3941/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8472
Learning Rate: 1.9066310942897607e-05
Epoch 3942/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7725
Learning Rate: 1.9066310942897607e-05
Epoch 3943/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8331
Learning Rate: 1.9066310942897607e-05
Epoch 3944/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7336
Learning Rate: 1.9066310942897607e-05
Epoch 3945/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8094
Learning Rate: 1.9066310942897607e-05
Epoch 3946/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8069
Learning Rate: 1.9066310942897607e-05
Epoch 3947/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7795
Learning Rate: 1.9066310942897607e-05
Epoch 3948/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8526
Learning Rate: 1.9066310942897607e-05
Epoch 3949/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7934
Learning Rate: 1.9066310942897607e-05
Epoch 3950/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8130
Learning Rate: 1.887564783346863e-05
Epoch 3951/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7984
Learning Rate: 1.887564783346863e-05
Epoch 3952/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7365
Learning Rate: 1.887564783346863e-05
Epoch 3953/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8574
Learning Rate: 1.887564783346863e-05
Epoch 3954/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8268
Learning Rate: 1.887564783346863e-05
Epoch 3955/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7738
Learning Rate: 1.887564783346863e-05
Epoch 3956/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8438
Learning Rate: 1.887564783346863e-05
Epoch 3957/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7588
Learning Rate: 1.887564783346863e-05
Epoch 3958/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7888
Learning Rate: 1.887564783346863e-05
Epoch 3959/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7553
Learning Rate: 1.887564783346863e-05
Epoch 3960/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8018
Learning Rate: 1.8686891355133943e-05
Epoch 3961/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8137
Learning Rate: 1.8686891355133943e-05
Epoch 3962/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6977
Learning Rate: 1.8686891355133943e-05
Epoch 3963/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7529
Learning Rate: 1.8686891355133943e-05
Epoch 3964/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9195
Learning Rate: 1.8686891355133943e-05
Epoch 3965/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7520
Learning Rate: 1.8686891355133943e-05
Epoch 3966/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7625
Learning Rate: 1.8686891355133943e-05
Epoch 3967/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7824
Learning Rate: 1.8686891355133943e-05
Epoch 3968/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8753
Learning Rate: 1.8686891355133943e-05
Epoch 3969/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7600
Learning Rate: 1.8686891355133943e-05
Epoch 3970/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7538
Learning Rate: 1.8500022441582605e-05
Epoch 3971/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8256
Learning Rate: 1.8500022441582605e-05
Epoch 3972/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8473
Learning Rate: 1.8500022441582605e-05
Epoch 3973/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8161
Learning Rate: 1.8500022441582605e-05
Epoch 3974/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7726
Learning Rate: 1.8500022441582605e-05
Epoch 3975/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7739
Learning Rate: 1.8500022441582605e-05
Epoch 3976/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7356
Learning Rate: 1.8500022441582605e-05
Epoch 3977/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8520
Learning Rate: 1.8500022441582605e-05
Epoch 3978/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6964
Learning Rate: 1.8500022441582605e-05
Epoch 3979/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9112
Learning Rate: 1.8500022441582605e-05
Epoch 3980/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0184
Learning Rate: 1.831502221716678e-05
Epoch 3981/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7771
Learning Rate: 1.831502221716678e-05
Epoch 3982/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8159
Learning Rate: 1.831502221716678e-05
Epoch 3983/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8003
Learning Rate: 1.831502221716678e-05
Epoch 3984/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8295
Learning Rate: 1.831502221716678e-05
Epoch 3985/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8263
Learning Rate: 1.831502221716678e-05
Epoch 3986/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7832
Learning Rate: 1.831502221716678e-05
Epoch 3987/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8355
Learning Rate: 1.831502221716678e-05
Epoch 3988/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6901
Learning Rate: 1.831502221716678e-05
Epoch 3989/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8409
Learning Rate: 1.831502221716678e-05
Epoch 3990/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7451
Learning Rate: 1.813187199499511e-05
Epoch 3991/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7096
Learning Rate: 1.813187199499511e-05
Epoch 3992/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9947
Learning Rate: 1.813187199499511e-05
Epoch 3993/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7686
Learning Rate: 1.813187199499511e-05
Epoch 3994/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7653
Learning Rate: 1.813187199499511e-05
Epoch 3995/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7535
Learning Rate: 1.813187199499511e-05
Epoch 3996/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7397
Learning Rate: 1.813187199499511e-05
Epoch 3997/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7626
Learning Rate: 1.813187199499511e-05
Epoch 3998/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9738
Learning Rate: 1.813187199499511e-05
Epoch 3999/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8161
Learning Rate: 1.813187199499511e-05
Epoch 4000/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8244
Learning Rate: 1.795055327504516e-05
Epoch 4001/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8230
Learning Rate: 1.795055327504516e-05
Epoch 4002/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7751
Learning Rate: 1.795055327504516e-05
Epoch 4003/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8306
Learning Rate: 1.795055327504516e-05
Epoch 4004/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8561
Learning Rate: 1.795055327504516e-05
Epoch 4005/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8805
Learning Rate: 1.795055327504516e-05
Epoch 4006/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7829
Learning Rate: 1.795055327504516e-05
Epoch 4007/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7367
Learning Rate: 1.795055327504516e-05
Epoch 4008/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9101
Learning Rate: 1.795055327504516e-05
Epoch 4009/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8693
Learning Rate: 1.795055327504516e-05
Epoch 4010/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8057
Learning Rate: 1.7771047742294706e-05
Epoch 4011/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8401
Learning Rate: 1.7771047742294706e-05
Epoch 4012/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8583
Learning Rate: 1.7771047742294706e-05
Epoch 4013/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7657
Learning Rate: 1.7771047742294706e-05
Epoch 4014/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8440
Learning Rate: 1.7771047742294706e-05
Epoch 4015/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8922
Learning Rate: 1.7771047742294706e-05
Epoch 4016/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8038
Learning Rate: 1.7771047742294706e-05
Epoch 4017/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7646
Learning Rate: 1.7771047742294706e-05
Epoch 4018/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9748
Learning Rate: 1.7771047742294706e-05
Epoch 4019/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8119
Learning Rate: 1.7771047742294706e-05
Epoch 4020/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7277
Learning Rate: 1.7593337264871757e-05
Epoch 4021/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8202
Learning Rate: 1.7593337264871757e-05
Epoch 4022/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8502
Learning Rate: 1.7593337264871757e-05
Epoch 4023/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8585
Learning Rate: 1.7593337264871757e-05
Epoch 4024/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7538
Learning Rate: 1.7593337264871757e-05
Epoch 4025/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8654
Learning Rate: 1.7593337264871757e-05
Epoch 4026/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7775
Learning Rate: 1.7593337264871757e-05
Epoch 4027/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7339
Learning Rate: 1.7593337264871757e-05
Epoch 4028/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8604
Learning Rate: 1.7593337264871757e-05
Epoch 4029/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6775
Learning Rate: 1.7593337264871757e-05
Epoch 4030/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8516
Learning Rate: 1.741740389222304e-05
Epoch 4031/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8416
Learning Rate: 1.741740389222304e-05
Epoch 4032/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7798
Learning Rate: 1.741740389222304e-05
Epoch 4033/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7732
Learning Rate: 1.741740389222304e-05
Epoch 4034/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7672
Learning Rate: 1.741740389222304e-05
Epoch 4035/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8439
Learning Rate: 1.741740389222304e-05
Epoch 4036/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7720
Learning Rate: 1.741740389222304e-05
Epoch 4037/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7380
Learning Rate: 1.741740389222304e-05
Epoch 4038/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.741740389222304e-05
Epoch 4039/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7650
Learning Rate: 1.741740389222304e-05
Epoch 4040/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8629
Learning Rate: 1.7243229853300812e-05
Epoch 4041/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7966
Learning Rate: 1.7243229853300812e-05
Epoch 4042/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8423
Learning Rate: 1.7243229853300812e-05
Epoch 4043/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7389
Learning Rate: 1.7243229853300812e-05
Epoch 4044/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8426
Learning Rate: 1.7243229853300812e-05
Epoch 4045/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7709
Learning Rate: 1.7243229853300812e-05
Epoch 4046/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7656
Learning Rate: 1.7243229853300812e-05
Epoch 4047/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8951
Learning Rate: 1.7243229853300812e-05
Epoch 4048/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8344
Learning Rate: 1.7243229853300812e-05
Epoch 4049/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8314
Learning Rate: 1.7243229853300812e-05
Epoch 4050/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7533
Learning Rate: 1.7070797554767804e-05
Epoch 4051/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6943
Learning Rate: 1.7070797554767804e-05
Epoch 4052/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7494
Learning Rate: 1.7070797554767804e-05
Epoch 4053/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7652
Learning Rate: 1.7070797554767804e-05
Epoch 4054/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8825
Learning Rate: 1.7070797554767804e-05
Epoch 4055/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8857
Learning Rate: 1.7070797554767804e-05
Epoch 4056/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7365
Learning Rate: 1.7070797554767804e-05
Epoch 4057/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7967
Learning Rate: 1.7070797554767804e-05
Epoch 4058/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7700
Learning Rate: 1.7070797554767804e-05
Epoch 4059/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8483
Learning Rate: 1.7070797554767804e-05
Epoch 4060/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8350
Learning Rate: 1.6900089579220128e-05
Epoch 4061/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8318
Learning Rate: 1.6900089579220128e-05
Epoch 4062/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8344
Learning Rate: 1.6900089579220128e-05
Epoch 4063/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8340
Learning Rate: 1.6900089579220128e-05
Epoch 4064/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7115
Learning Rate: 1.6900089579220128e-05
Epoch 4065/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8018
Learning Rate: 1.6900089579220128e-05
Epoch 4066/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8262
Learning Rate: 1.6900089579220128e-05
Epoch 4067/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7625
Learning Rate: 1.6900089579220128e-05
Epoch 4068/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8502
Learning Rate: 1.6900089579220128e-05
Epoch 4069/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7218
Learning Rate: 1.6900089579220128e-05
Epoch 4070/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8433
Learning Rate: 1.6731088683427927e-05
Epoch 4071/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8237
Learning Rate: 1.6731088683427927e-05
Epoch 4072/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7772
Learning Rate: 1.6731088683427927e-05
Epoch 4073/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8879
Learning Rate: 1.6731088683427927e-05
Epoch 4074/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8116
Learning Rate: 1.6731088683427927e-05
Epoch 4075/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8234
Learning Rate: 1.6731088683427927e-05
Epoch 4076/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8680
Learning Rate: 1.6731088683427927e-05
Epoch 4077/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8196
Learning Rate: 1.6731088683427927e-05
Epoch 4078/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7555
Learning Rate: 1.6731088683427927e-05
Epoch 4079/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7727
Learning Rate: 1.6731088683427927e-05
Epoch 4080/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6848
Learning Rate: 1.656377779659365e-05
Epoch 4081/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8921
Learning Rate: 1.656377779659365e-05
Epoch 4082/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8357
Learning Rate: 1.656377779659365e-05
Epoch 4083/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8780
Learning Rate: 1.656377779659365e-05
Epoch 4084/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8500
Learning Rate: 1.656377779659365e-05
Epoch 4085/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7671
Learning Rate: 1.656377779659365e-05
Epoch 4086/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7253
Learning Rate: 1.656377779659365e-05
Epoch 4087/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8120
Learning Rate: 1.656377779659365e-05
Epoch 4088/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8005
Learning Rate: 1.656377779659365e-05
Epoch 4089/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7517
Learning Rate: 1.656377779659365e-05
Epoch 4090/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7958
Learning Rate: 1.6398140018627712e-05
Epoch 4091/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7826
Learning Rate: 1.6398140018627712e-05
Epoch 4092/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8908
Learning Rate: 1.6398140018627712e-05
Epoch 4093/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8169
Learning Rate: 1.6398140018627712e-05
Epoch 4094/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8232
Learning Rate: 1.6398140018627712e-05
Epoch 4095/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8578
Learning Rate: 1.6398140018627712e-05
Epoch 4096/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7785
Learning Rate: 1.6398140018627712e-05
Epoch 4097/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7166
Learning Rate: 1.6398140018627712e-05
Epoch 4098/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8849
Learning Rate: 1.6398140018627712e-05
Epoch 4099/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7475
Learning Rate: 1.6398140018627712e-05
Epoch 4100/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7848
Learning Rate: 1.6234158618441435e-05
Epoch 4101/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9406
Learning Rate: 1.6234158618441435e-05
Epoch 4102/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7855
Learning Rate: 1.6234158618441435e-05
Epoch 4103/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7767
Learning Rate: 1.6234158618441435e-05
Epoch 4104/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8085
Learning Rate: 1.6234158618441435e-05
Epoch 4105/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6974
Learning Rate: 1.6234158618441435e-05
Epoch 4106/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8204
Learning Rate: 1.6234158618441435e-05
Epoch 4107/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7425
Learning Rate: 1.6234158618441435e-05
Epoch 4108/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7870
Learning Rate: 1.6234158618441435e-05
Epoch 4109/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7128
Learning Rate: 1.6234158618441435e-05
Epoch 4110/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7830
Learning Rate: 1.607181703225702e-05
Epoch 4111/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7700
Learning Rate: 1.607181703225702e-05
Epoch 4112/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7344
Learning Rate: 1.607181703225702e-05
Epoch 4113/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7750
Learning Rate: 1.607181703225702e-05
Epoch 4114/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7530
Learning Rate: 1.607181703225702e-05
Epoch 4115/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7741
Learning Rate: 1.607181703225702e-05
Epoch 4116/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8034
Learning Rate: 1.607181703225702e-05
Epoch 4117/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7841
Learning Rate: 1.607181703225702e-05
Epoch 4118/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8290
Learning Rate: 1.607181703225702e-05
Epoch 4119/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7019
Learning Rate: 1.607181703225702e-05
Epoch 4120/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9272
Learning Rate: 1.591109886193445e-05
Epoch 4121/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7592
Learning Rate: 1.591109886193445e-05
Epoch 4122/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7891
Learning Rate: 1.591109886193445e-05
Epoch 4123/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7740
Learning Rate: 1.591109886193445e-05
Epoch 4124/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8851
Learning Rate: 1.591109886193445e-05
Epoch 4125/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8598
Learning Rate: 1.591109886193445e-05
Epoch 4126/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7820
Learning Rate: 1.591109886193445e-05
Epoch 4127/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9127
Learning Rate: 1.591109886193445e-05
Epoch 4128/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8528
Learning Rate: 1.591109886193445e-05
Epoch 4129/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8514
Learning Rate: 1.591109886193445e-05
Epoch 4130/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7754
Learning Rate: 1.5751987873315108e-05
Epoch 4131/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8073
Learning Rate: 1.5751987873315108e-05
Epoch 4132/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8045
Learning Rate: 1.5751987873315108e-05
Epoch 4133/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9029
Learning Rate: 1.5751987873315108e-05
Epoch 4134/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0217
Learning Rate: 1.5751987873315108e-05
Epoch 4135/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8428
Learning Rate: 1.5751987873315108e-05
Epoch 4136/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8060
Learning Rate: 1.5751987873315108e-05
Epoch 4137/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7097
Learning Rate: 1.5751987873315108e-05
Epoch 4138/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7891
Learning Rate: 1.5751987873315108e-05
Epoch 4139/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8531
Learning Rate: 1.5751987873315108e-05
Epoch 4140/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6986
Learning Rate: 1.5594467994581957e-05
Epoch 4141/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9939
Learning Rate: 1.5594467994581957e-05
Epoch 4142/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8793
Learning Rate: 1.5594467994581957e-05
Epoch 4143/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7343
Learning Rate: 1.5594467994581957e-05
Epoch 4144/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8304
Learning Rate: 1.5594467994581957e-05
Epoch 4145/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7535
Learning Rate: 1.5594467994581957e-05
Epoch 4146/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8277
Learning Rate: 1.5594467994581957e-05
Epoch 4147/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9812
Learning Rate: 1.5594467994581957e-05
Epoch 4148/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8490
Learning Rate: 1.5594467994581957e-05
Epoch 4149/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7628
Learning Rate: 1.5594467994581957e-05
Epoch 4150/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7166
Learning Rate: 1.543852331463614e-05
Epoch 4151/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8073
Learning Rate: 1.543852331463614e-05
Epoch 4152/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7326
Learning Rate: 1.543852331463614e-05
Epoch 4153/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7831
Learning Rate: 1.543852331463614e-05
Epoch 4154/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8514
Learning Rate: 1.543852331463614e-05
Epoch 4155/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7750
Learning Rate: 1.543852331463614e-05
Epoch 4156/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8243
Learning Rate: 1.543852331463614e-05
Epoch 4157/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8126
Learning Rate: 1.543852331463614e-05
Epoch 4158/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7204
Learning Rate: 1.543852331463614e-05
Epoch 4159/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8664
Learning Rate: 1.543852331463614e-05
Epoch 4160/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8528
Learning Rate: 1.5284138081489776e-05
Epoch 4161/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7998
Learning Rate: 1.5284138081489776e-05
Epoch 4162/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7865
Learning Rate: 1.5284138081489776e-05
Epoch 4163/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8527
Learning Rate: 1.5284138081489776e-05
Epoch 4164/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8205
Learning Rate: 1.5284138081489776e-05
Epoch 4165/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.5284138081489776e-05
Epoch 4166/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7419
Learning Rate: 1.5284138081489776e-05
Epoch 4167/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8384
Learning Rate: 1.5284138081489776e-05
Epoch 4168/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8863
Learning Rate: 1.5284138081489776e-05
Epoch 4169/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7276
Learning Rate: 1.5284138081489776e-05
Epoch 4170/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7921
Learning Rate: 1.5131296700674877e-05
Epoch 4171/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8198
Learning Rate: 1.5131296700674877e-05
Epoch 4172/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8886
Learning Rate: 1.5131296700674877e-05
Epoch 4173/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8685
Learning Rate: 1.5131296700674877e-05
Epoch 4174/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8032
Learning Rate: 1.5131296700674877e-05
Epoch 4175/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8125
Learning Rate: 1.5131296700674877e-05
Epoch 4176/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8468
Learning Rate: 1.5131296700674877e-05
Epoch 4177/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7966
Learning Rate: 1.5131296700674877e-05
Epoch 4178/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8919
Learning Rate: 1.5131296700674877e-05
Epoch 4179/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8301
Learning Rate: 1.5131296700674877e-05
Epoch 4180/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7723
Learning Rate: 1.4979983733668128e-05
Epoch 4181/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7293
Learning Rate: 1.4979983733668128e-05
Epoch 4182/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9831
Learning Rate: 1.4979983733668128e-05
Epoch 4183/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8656
Learning Rate: 1.4979983733668128e-05
Epoch 4184/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8117
Learning Rate: 1.4979983733668128e-05
Epoch 4185/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8429
Learning Rate: 1.4979983733668128e-05
Epoch 4186/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7978
Learning Rate: 1.4979983733668128e-05
Epoch 4187/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7476
Learning Rate: 1.4979983733668128e-05
Epoch 4188/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8741
Learning Rate: 1.4979983733668128e-05
Epoch 4189/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7788
Learning Rate: 1.4979983733668128e-05
Epoch 4190/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7756
Learning Rate: 1.4830183896331446e-05
Epoch 4191/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8287
Learning Rate: 1.4830183896331446e-05
Epoch 4192/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7144
Learning Rate: 1.4830183896331446e-05
Epoch 4193/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8018
Learning Rate: 1.4830183896331446e-05
Epoch 4194/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7139
Learning Rate: 1.4830183896331446e-05
Epoch 4195/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7832
Learning Rate: 1.4830183896331446e-05
Epoch 4196/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8993
Learning Rate: 1.4830183896331446e-05
Epoch 4197/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8673
Learning Rate: 1.4830183896331446e-05
Epoch 4198/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7357
Learning Rate: 1.4830183896331446e-05
Epoch 4199/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8404
Learning Rate: 1.4830183896331446e-05
Epoch 4200/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9053
Learning Rate: 1.4681882057368132e-05
Epoch 4201/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8234
Learning Rate: 1.4681882057368132e-05
Epoch 4202/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8456
Learning Rate: 1.4681882057368132e-05
Epoch 4203/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6868
Learning Rate: 1.4681882057368132e-05
Epoch 4204/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8074
Learning Rate: 1.4681882057368132e-05
Epoch 4205/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8723
Learning Rate: 1.4681882057368132e-05
Epoch 4206/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8186
Learning Rate: 1.4681882057368132e-05
Epoch 4207/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8125
Learning Rate: 1.4681882057368132e-05
Epoch 4208/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7523
Learning Rate: 1.4681882057368132e-05
Epoch 4209/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7907
Learning Rate: 1.4681882057368132e-05
Epoch 4210/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8261
Learning Rate: 1.453506323679445e-05
Epoch 4211/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9911
Learning Rate: 1.453506323679445e-05
Epoch 4212/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8598
Learning Rate: 1.453506323679445e-05
Epoch 4213/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7882
Learning Rate: 1.453506323679445e-05
Epoch 4214/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7681
Learning Rate: 1.453506323679445e-05
Epoch 4215/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7550
Learning Rate: 1.453506323679445e-05
Epoch 4216/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8295
Learning Rate: 1.453506323679445e-05
Epoch 4217/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9860
Learning Rate: 1.453506323679445e-05
Epoch 4218/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7978
Learning Rate: 1.453506323679445e-05
Epoch 4219/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7653
Learning Rate: 1.453506323679445e-05
Epoch 4220/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9399
Learning Rate: 1.4389712604426506e-05
Epoch 4221/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8598
Learning Rate: 1.4389712604426506e-05
Epoch 4222/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8698
Learning Rate: 1.4389712604426506e-05
Epoch 4223/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8446
Learning Rate: 1.4389712604426506e-05
Epoch 4224/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8001
Learning Rate: 1.4389712604426506e-05
Epoch 4225/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8307
Learning Rate: 1.4389712604426506e-05
Epoch 4226/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6809
Learning Rate: 1.4389712604426506e-05
Epoch 4227/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9167
Learning Rate: 1.4389712604426506e-05
Epoch 4228/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8155
Learning Rate: 1.4389712604426506e-05
Epoch 4229/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7084
Learning Rate: 1.4389712604426506e-05
Epoch 4230/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8557
Learning Rate: 1.4245815478382241e-05
Epoch 4231/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7569
Learning Rate: 1.4245815478382241e-05
Epoch 4232/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7987
Learning Rate: 1.4245815478382241e-05
Epoch 4233/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8102
Learning Rate: 1.4245815478382241e-05
Epoch 4234/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7951
Learning Rate: 1.4245815478382241e-05
Epoch 4235/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8927
Learning Rate: 1.4245815478382241e-05
Epoch 4236/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8403
Learning Rate: 1.4245815478382241e-05
Epoch 4237/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7502
Learning Rate: 1.4245815478382241e-05
Epoch 4238/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7840
Learning Rate: 1.4245815478382241e-05
Epoch 4239/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7719
Learning Rate: 1.4245815478382241e-05
Epoch 4240/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7491
Learning Rate: 1.410335732359842e-05
Epoch 4241/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7859
Learning Rate: 1.410335732359842e-05
Epoch 4242/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8649
Learning Rate: 1.410335732359842e-05
Epoch 4243/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8710
Learning Rate: 1.410335732359842e-05
Epoch 4244/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7625
Learning Rate: 1.410335732359842e-05
Epoch 4245/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7999
Learning Rate: 1.410335732359842e-05
Epoch 4246/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7518
Learning Rate: 1.410335732359842e-05
Epoch 4247/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8293
Learning Rate: 1.410335732359842e-05
Epoch 4248/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9187
Learning Rate: 1.410335732359842e-05
Epoch 4249/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8329
Learning Rate: 1.410335732359842e-05
Epoch 4250/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7218
Learning Rate: 1.3962323750362435e-05
Epoch 4251/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 1.3962323750362435e-05
Epoch 4252/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8590
Learning Rate: 1.3962323750362435e-05
Epoch 4253/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8527
Learning Rate: 1.3962323750362435e-05
Epoch 4254/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8275
Learning Rate: 1.3962323750362435e-05
Epoch 4255/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7777
Learning Rate: 1.3962323750362435e-05
Epoch 4256/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8213
Learning Rate: 1.3962323750362435e-05
Epoch 4257/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7172
Learning Rate: 1.3962323750362435e-05
Epoch 4258/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9377
Learning Rate: 1.3962323750362435e-05
Epoch 4259/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7167
Learning Rate: 1.3962323750362435e-05
Epoch 4260/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7935
Learning Rate: 1.3822700512858811e-05
Epoch 4261/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8360
Learning Rate: 1.3822700512858811e-05
Epoch 4262/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8284
Learning Rate: 1.3822700512858811e-05
Epoch 4263/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7273
Learning Rate: 1.3822700512858811e-05
Epoch 4264/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8528
Learning Rate: 1.3822700512858811e-05
Epoch 4265/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7899
Learning Rate: 1.3822700512858811e-05
Epoch 4266/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6842
Learning Rate: 1.3822700512858811e-05
Epoch 4267/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6898
Learning Rate: 1.3822700512858811e-05
Epoch 4268/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8241
Learning Rate: 1.3822700512858811e-05
Epoch 4269/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7452
Learning Rate: 1.3822700512858811e-05
Epoch 4270/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7491
Learning Rate: 1.3684473507730223e-05
Epoch 4271/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8153
Learning Rate: 1.3684473507730223e-05
Epoch 4272/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8280
Learning Rate: 1.3684473507730223e-05
Epoch 4273/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7675
Learning Rate: 1.3684473507730223e-05
Epoch 4274/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8021
Learning Rate: 1.3684473507730223e-05
Epoch 4275/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8070
Learning Rate: 1.3684473507730223e-05
Epoch 4276/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7884
Learning Rate: 1.3684473507730223e-05
Epoch 4277/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7490
Learning Rate: 1.3684473507730223e-05
Epoch 4278/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8215
Learning Rate: 1.3684473507730223e-05
Epoch 4279/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8390
Learning Rate: 1.3684473507730223e-05
Epoch 4280/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8182
Learning Rate: 1.354762877265292e-05
Epoch 4281/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8356
Learning Rate: 1.354762877265292e-05
Epoch 4282/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7806
Learning Rate: 1.354762877265292e-05
Epoch 4283/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9261
Learning Rate: 1.354762877265292e-05
Epoch 4284/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9456
Learning Rate: 1.354762877265292e-05
Epoch 4285/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8654
Learning Rate: 1.354762877265292e-05
Epoch 4286/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7591
Learning Rate: 1.354762877265292e-05
Epoch 4287/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8123
Learning Rate: 1.354762877265292e-05
Epoch 4288/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6981
Learning Rate: 1.354762877265292e-05
Epoch 4289/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7034
Learning Rate: 1.354762877265292e-05
Epoch 4290/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7387
Learning Rate: 1.341215248492639e-05
Epoch 4291/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7758
Learning Rate: 1.341215248492639e-05
Epoch 4292/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8235
Learning Rate: 1.341215248492639e-05
Epoch 4293/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8325
Learning Rate: 1.341215248492639e-05
Epoch 4294/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7334
Learning Rate: 1.341215248492639e-05
Epoch 4295/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7644
Learning Rate: 1.341215248492639e-05
Epoch 4296/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8045
Learning Rate: 1.341215248492639e-05
Epoch 4297/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9383
Learning Rate: 1.341215248492639e-05
Epoch 4298/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8646
Learning Rate: 1.341215248492639e-05
Epoch 4299/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8343
Learning Rate: 1.341215248492639e-05
Epoch 4300/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7471
Learning Rate: 1.3278030960077125e-05
Epoch 4301/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7698
Learning Rate: 1.3278030960077125e-05
Epoch 4302/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8689
Learning Rate: 1.3278030960077125e-05
Epoch 4303/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0353
Learning Rate: 1.3278030960077125e-05
Epoch 4304/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7546
Learning Rate: 1.3278030960077125e-05
Epoch 4305/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7513
Learning Rate: 1.3278030960077125e-05
Epoch 4306/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7811
Learning Rate: 1.3278030960077125e-05
Epoch 4307/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8426
Learning Rate: 1.3278030960077125e-05
Epoch 4308/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9071
Learning Rate: 1.3278030960077125e-05
Epoch 4309/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8550
Learning Rate: 1.3278030960077125e-05
Epoch 4310/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7006
Learning Rate: 1.3145250650476354e-05
Epoch 4311/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8466
Learning Rate: 1.3145250650476354e-05
Epoch 4312/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8651
Learning Rate: 1.3145250650476354e-05
Epoch 4313/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8233
Learning Rate: 1.3145250650476354e-05
Epoch 4314/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7182
Learning Rate: 1.3145250650476354e-05
Epoch 4315/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7835
Learning Rate: 1.3145250650476354e-05
Epoch 4316/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7138
Learning Rate: 1.3145250650476354e-05
Epoch 4317/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8017
Learning Rate: 1.3145250650476354e-05
Epoch 4318/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7269
Learning Rate: 1.3145250650476354e-05
Epoch 4319/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9247
Learning Rate: 1.3145250650476354e-05
Epoch 4320/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7778
Learning Rate: 1.3013798143971591e-05
Epoch 4321/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7191
Learning Rate: 1.3013798143971591e-05
Epoch 4322/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7690
Learning Rate: 1.3013798143971591e-05
Epoch 4323/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8436
Learning Rate: 1.3013798143971591e-05
Epoch 4324/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7122
Learning Rate: 1.3013798143971591e-05
Epoch 4325/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7705
Learning Rate: 1.3013798143971591e-05
Epoch 4326/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7791
Learning Rate: 1.3013798143971591e-05
Epoch 4327/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7644
Learning Rate: 1.3013798143971591e-05
Epoch 4328/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7742
Learning Rate: 1.3013798143971591e-05
Epoch 4329/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.1643
Learning Rate: 1.3013798143971591e-05
Epoch 4330/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8058
Learning Rate: 1.2883660162531876e-05
Epoch 4331/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8218
Learning Rate: 1.2883660162531876e-05
Epoch 4332/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7496
Learning Rate: 1.2883660162531876e-05
Epoch 4333/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7351
Learning Rate: 1.2883660162531876e-05
Epoch 4334/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7988
Learning Rate: 1.2883660162531876e-05
Epoch 4335/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7543
Learning Rate: 1.2883660162531876e-05
Epoch 4336/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8709
Learning Rate: 1.2883660162531876e-05
Epoch 4337/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7298
Learning Rate: 1.2883660162531876e-05
Epoch 4338/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7531
Learning Rate: 1.2883660162531876e-05
Epoch 4339/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8283
Learning Rate: 1.2883660162531876e-05
Epoch 4340/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7957
Learning Rate: 1.2754823560906558e-05
Epoch 4341/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8080
Learning Rate: 1.2754823560906558e-05
Epoch 4342/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7693
Learning Rate: 1.2754823560906558e-05
Epoch 4343/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9501
Learning Rate: 1.2754823560906558e-05
Epoch 4344/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8372
Learning Rate: 1.2754823560906558e-05
Epoch 4345/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6978
Learning Rate: 1.2754823560906558e-05
Epoch 4346/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7542
Learning Rate: 1.2754823560906558e-05
Epoch 4347/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7752
Learning Rate: 1.2754823560906558e-05
Epoch 4348/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7757
Learning Rate: 1.2754823560906558e-05
Epoch 4349/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7273
Learning Rate: 1.2754823560906558e-05
Epoch 4350/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9033
Learning Rate: 1.2627275325297491e-05
Epoch 4351/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8149
Learning Rate: 1.2627275325297491e-05
Epoch 4352/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7574
Learning Rate: 1.2627275325297491e-05
Epoch 4353/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7904
Learning Rate: 1.2627275325297491e-05
Epoch 4354/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8338
Learning Rate: 1.2627275325297491e-05
Epoch 4355/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7747
Learning Rate: 1.2627275325297491e-05
Epoch 4356/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7158
Learning Rate: 1.2627275325297491e-05
Epoch 4357/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8421
Learning Rate: 1.2627275325297491e-05
Epoch 4358/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7986
Learning Rate: 1.2627275325297491e-05
Epoch 4359/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8653
Learning Rate: 1.2627275325297491e-05
Epoch 4360/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8411
Learning Rate: 1.2501002572044516e-05
Epoch 4361/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7976
Learning Rate: 1.2501002572044516e-05
Epoch 4362/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 1.2501002572044516e-05
Epoch 4363/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7324
Learning Rate: 1.2501002572044516e-05
Epoch 4364/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7756
Learning Rate: 1.2501002572044516e-05
Epoch 4365/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7892
Learning Rate: 1.2501002572044516e-05
Epoch 4366/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8279
Learning Rate: 1.2501002572044516e-05
Epoch 4367/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7749
Learning Rate: 1.2501002572044516e-05
Epoch 4368/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7229
Learning Rate: 1.2501002572044516e-05
Epoch 4369/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7630
Learning Rate: 1.2501002572044516e-05
Epoch 4370/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8260
Learning Rate: 1.237599254632407e-05
Epoch 4371/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7724
Learning Rate: 1.237599254632407e-05
Epoch 4372/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6980
Learning Rate: 1.237599254632407e-05
Epoch 4373/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7498
Learning Rate: 1.237599254632407e-05
Epoch 4374/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7830
Learning Rate: 1.237599254632407e-05
Epoch 4375/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8197
Learning Rate: 1.237599254632407e-05
Epoch 4376/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7508
Learning Rate: 1.237599254632407e-05
Epoch 4377/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8024
Learning Rate: 1.237599254632407e-05
Epoch 4378/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7357
Learning Rate: 1.237599254632407e-05
Epoch 4379/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7823
Learning Rate: 1.237599254632407e-05
Epoch 4380/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9222
Learning Rate: 1.2252232620860829e-05
Epoch 4381/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8085
Learning Rate: 1.2252232620860829e-05
Epoch 4382/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7188
Learning Rate: 1.2252232620860829e-05
Epoch 4383/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8796
Learning Rate: 1.2252232620860829e-05
Epoch 4384/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7445
Learning Rate: 1.2252232620860829e-05
Epoch 4385/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7751
Learning Rate: 1.2252232620860829e-05
Epoch 4386/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7376
Learning Rate: 1.2252232620860829e-05
Epoch 4387/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6959
Learning Rate: 1.2252232620860829e-05
Epoch 4388/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7700
Learning Rate: 1.2252232620860829e-05
Epoch 4389/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7885
Learning Rate: 1.2252232620860829e-05
Epoch 4390/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8259
Learning Rate: 1.212971029465222e-05
Epoch 4391/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7240
Learning Rate: 1.212971029465222e-05
Epoch 4392/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6610
Learning Rate: 1.212971029465222e-05
Epoch 4393/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7628
Learning Rate: 1.212971029465222e-05
Epoch 4394/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9309
Learning Rate: 1.212971029465222e-05
Epoch 4395/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8475
Learning Rate: 1.212971029465222e-05
Epoch 4396/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.212971029465222e-05
Epoch 4397/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7601
Learning Rate: 1.212971029465222e-05
Epoch 4398/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7692
Learning Rate: 1.212971029465222e-05
Epoch 4399/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8317
Learning Rate: 1.212971029465222e-05
Epoch 4400/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8544
Learning Rate: 1.2008413191705698e-05
Epoch 4401/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9414
Learning Rate: 1.2008413191705698e-05
Epoch 4402/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7617
Learning Rate: 1.2008413191705698e-05
Epoch 4403/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7906
Learning Rate: 1.2008413191705698e-05
Epoch 4404/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8059
Learning Rate: 1.2008413191705698e-05
Epoch 4405/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7932
Learning Rate: 1.2008413191705698e-05
Epoch 4406/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8176
Learning Rate: 1.2008413191705698e-05
Epoch 4407/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8208
Learning Rate: 1.2008413191705698e-05
Epoch 4408/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8667
Learning Rate: 1.2008413191705698e-05
Epoch 4409/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8164
Learning Rate: 1.2008413191705698e-05
Epoch 4410/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7830
Learning Rate: 1.188832905978864e-05
Epoch 4411/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9384
Learning Rate: 1.188832905978864e-05
Epoch 4412/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7253
Learning Rate: 1.188832905978864e-05
Epoch 4413/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8186
Learning Rate: 1.188832905978864e-05
Epoch 4414/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8027
Learning Rate: 1.188832905978864e-05
Epoch 4415/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7259
Learning Rate: 1.188832905978864e-05
Epoch 4416/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8453
Learning Rate: 1.188832905978864e-05
Epoch 4417/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7725
Learning Rate: 1.188832905978864e-05
Epoch 4418/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8416
Learning Rate: 1.188832905978864e-05
Epoch 4419/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7624
Learning Rate: 1.188832905978864e-05
Epoch 4420/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7645
Learning Rate: 1.1769445769190754e-05
Epoch 4421/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8906
Learning Rate: 1.1769445769190754e-05
Epoch 4422/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7140
Learning Rate: 1.1769445769190754e-05
Epoch 4423/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7278
Learning Rate: 1.1769445769190754e-05
Epoch 4424/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7234
Learning Rate: 1.1769445769190754e-05
Epoch 4425/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8387
Learning Rate: 1.1769445769190754e-05
Epoch 4426/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7674
Learning Rate: 1.1769445769190754e-05
Epoch 4427/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6927
Learning Rate: 1.1769445769190754e-05
Epoch 4428/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8269
Learning Rate: 1.1769445769190754e-05
Epoch 4429/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7975
Learning Rate: 1.1769445769190754e-05
Epoch 4430/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8125
Learning Rate: 1.1651751311498847e-05
Epoch 4431/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7071
Learning Rate: 1.1651751311498847e-05
Epoch 4432/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7542
Learning Rate: 1.1651751311498847e-05
Epoch 4433/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8033
Learning Rate: 1.1651751311498847e-05
Epoch 4434/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7615
Learning Rate: 1.1651751311498847e-05
Epoch 4435/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8775
Learning Rate: 1.1651751311498847e-05
Epoch 4436/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6928
Learning Rate: 1.1651751311498847e-05
Epoch 4437/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7444
Learning Rate: 1.1651751311498847e-05
Epoch 4438/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9186
Learning Rate: 1.1651751311498847e-05
Epoch 4439/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8371
Learning Rate: 1.1651751311498847e-05
Epoch 4440/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8606
Learning Rate: 1.1535233798383858e-05
Epoch 4441/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8362
Learning Rate: 1.1535233798383858e-05
Epoch 4442/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7443
Learning Rate: 1.1535233798383858e-05
Epoch 4443/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7308
Learning Rate: 1.1535233798383858e-05
Epoch 4444/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7943
Learning Rate: 1.1535233798383858e-05
Epoch 4445/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7159
Learning Rate: 1.1535233798383858e-05
Epoch 4446/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9216
Learning Rate: 1.1535233798383858e-05
Epoch 4447/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8880
Learning Rate: 1.1535233798383858e-05
Epoch 4448/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8130
Learning Rate: 1.1535233798383858e-05
Epoch 4449/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6837
Learning Rate: 1.1535233798383858e-05
Epoch 4450/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8065
Learning Rate: 1.141988146040002e-05
Epoch 4451/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7090
Learning Rate: 1.141988146040002e-05
Epoch 4452/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7116
Learning Rate: 1.141988146040002e-05
Epoch 4453/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8769
Learning Rate: 1.141988146040002e-05
Epoch 4454/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7878
Learning Rate: 1.141988146040002e-05
Epoch 4455/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8182
Learning Rate: 1.141988146040002e-05
Epoch 4456/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9034
Learning Rate: 1.141988146040002e-05
Epoch 4457/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7975
Learning Rate: 1.141988146040002e-05
Epoch 4458/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0013
Learning Rate: 1.141988146040002e-05
Epoch 4459/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7237
Learning Rate: 1.141988146040002e-05
Epoch 4460/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9226
Learning Rate: 1.130568264579602e-05
Epoch 4461/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7476
Learning Rate: 1.130568264579602e-05
Epoch 4462/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9048
Learning Rate: 1.130568264579602e-05
Epoch 4463/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9165
Learning Rate: 1.130568264579602e-05
Epoch 4464/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8495
Learning Rate: 1.130568264579602e-05
Epoch 4465/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8289
Learning Rate: 1.130568264579602e-05
Epoch 4466/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8136
Learning Rate: 1.130568264579602e-05
Epoch 4467/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7533
Learning Rate: 1.130568264579602e-05
Epoch 4468/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7500
Learning Rate: 1.130568264579602e-05
Epoch 4469/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7237
Learning Rate: 1.130568264579602e-05
Epoch 4470/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7888
Learning Rate: 1.119262581933806e-05
Epoch 4471/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7943
Learning Rate: 1.119262581933806e-05
Epoch 4472/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8142
Learning Rate: 1.119262581933806e-05
Epoch 4473/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7354
Learning Rate: 1.119262581933806e-05
Epoch 4474/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9294
Learning Rate: 1.119262581933806e-05
Epoch 4475/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7080
Learning Rate: 1.119262581933806e-05
Epoch 4476/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7889
Learning Rate: 1.119262581933806e-05
Epoch 4477/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6704
Learning Rate: 1.119262581933806e-05
Epoch 4478/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7658
Learning Rate: 1.119262581933806e-05
Epoch 4479/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7862
Learning Rate: 1.119262581933806e-05
Epoch 4480/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8867
Learning Rate: 1.108069956114468e-05
Epoch 4481/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7371
Learning Rate: 1.108069956114468e-05
Epoch 4482/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8186
Learning Rate: 1.108069956114468e-05
Epoch 4483/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7761
Learning Rate: 1.108069956114468e-05
Epoch 4484/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8089
Learning Rate: 1.108069956114468e-05
Epoch 4485/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9879
Learning Rate: 1.108069956114468e-05
Epoch 4486/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8113
Learning Rate: 1.108069956114468e-05
Epoch 4487/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8450
Learning Rate: 1.108069956114468e-05
Epoch 4488/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8091
Learning Rate: 1.108069956114468e-05
Epoch 4489/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8681
Learning Rate: 1.108069956114468e-05
Epoch 4490/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7589
Learning Rate: 1.0969892565533232e-05
Epoch 4491/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7741
Learning Rate: 1.0969892565533232e-05
Epoch 4492/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8206
Learning Rate: 1.0969892565533232e-05
Epoch 4493/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7529
Learning Rate: 1.0969892565533232e-05
Epoch 4494/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8002
Learning Rate: 1.0969892565533232e-05
Epoch 4495/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9789
Learning Rate: 1.0969892565533232e-05
Epoch 4496/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7794
Learning Rate: 1.0969892565533232e-05
Epoch 4497/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8904
Learning Rate: 1.0969892565533232e-05
Epoch 4498/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8390
Learning Rate: 1.0969892565533232e-05
Epoch 4499/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8585
Learning Rate: 1.0969892565533232e-05
Epoch 4500/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6768
Learning Rate: 1.08601936398779e-05
Epoch 4501/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8210
Learning Rate: 1.08601936398779e-05
Epoch 4502/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8256
Learning Rate: 1.08601936398779e-05
Epoch 4503/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8525
Learning Rate: 1.08601936398779e-05
Epoch 4504/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7900
Learning Rate: 1.08601936398779e-05
Epoch 4505/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7230
Learning Rate: 1.08601936398779e-05
Epoch 4506/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8447
Learning Rate: 1.08601936398779e-05
Epoch 4507/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9502
Learning Rate: 1.08601936398779e-05
Epoch 4508/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 1.08601936398779e-05
Epoch 4509/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7807
Learning Rate: 1.08601936398779e-05
Epoch 4510/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7652
Learning Rate: 1.075159170347912e-05
Epoch 4511/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7960
Learning Rate: 1.075159170347912e-05
Epoch 4512/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8607
Learning Rate: 1.075159170347912e-05
Epoch 4513/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8367
Learning Rate: 1.075159170347912e-05
Epoch 4514/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7978
Learning Rate: 1.075159170347912e-05
Epoch 4515/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8215
Learning Rate: 1.075159170347912e-05
Epoch 4516/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7808
Learning Rate: 1.075159170347912e-05
Epoch 4517/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8153
Learning Rate: 1.075159170347912e-05
Epoch 4518/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8831
Learning Rate: 1.075159170347912e-05
Epoch 4519/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7187
Learning Rate: 1.075159170347912e-05
Epoch 4520/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7975
Learning Rate: 1.064407578644433e-05
Epoch 4521/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7930
Learning Rate: 1.064407578644433e-05
Epoch 4522/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8822
Learning Rate: 1.064407578644433e-05
Epoch 4523/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8875
Learning Rate: 1.064407578644433e-05
Epoch 4524/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7017
Learning Rate: 1.064407578644433e-05
Epoch 4525/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8367
Learning Rate: 1.064407578644433e-05
Epoch 4526/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8773
Learning Rate: 1.064407578644433e-05
Epoch 4527/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7102
Learning Rate: 1.064407578644433e-05
Epoch 4528/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9241
Learning Rate: 1.064407578644433e-05
Epoch 4529/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8538
Learning Rate: 1.064407578644433e-05
Epoch 4530/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8573
Learning Rate: 1.0537635028579887e-05
Epoch 4531/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8043
Learning Rate: 1.0537635028579887e-05
Epoch 4532/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8048
Learning Rate: 1.0537635028579887e-05
Epoch 4533/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8102
Learning Rate: 1.0537635028579887e-05
Epoch 4534/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8151
Learning Rate: 1.0537635028579887e-05
Epoch 4535/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8253
Learning Rate: 1.0537635028579887e-05
Epoch 4536/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7251
Learning Rate: 1.0537635028579887e-05
Epoch 4537/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8482
Learning Rate: 1.0537635028579887e-05
Epoch 4538/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7938
Learning Rate: 1.0537635028579887e-05
Epoch 4539/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7918
Learning Rate: 1.0537635028579887e-05
Epoch 4540/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7400
Learning Rate: 1.0432258678294088e-05
Epoch 4541/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8330
Learning Rate: 1.0432258678294088e-05
Epoch 4542/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7374
Learning Rate: 1.0432258678294088e-05
Epoch 4543/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7989
Learning Rate: 1.0432258678294088e-05
Epoch 4544/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.0432258678294088e-05
Epoch 4545/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7909
Learning Rate: 1.0432258678294088e-05
Epoch 4546/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9241
Learning Rate: 1.0432258678294088e-05
Epoch 4547/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7539
Learning Rate: 1.0432258678294088e-05
Epoch 4548/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7506
Learning Rate: 1.0432258678294088e-05
Epoch 4549/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7027
Learning Rate: 1.0432258678294088e-05
Epoch 4550/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7564
Learning Rate: 1.0327936091511146e-05
Epoch 4551/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9080
Learning Rate: 1.0327936091511146e-05
Epoch 4552/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8087
Learning Rate: 1.0327936091511146e-05
Epoch 4553/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8152
Learning Rate: 1.0327936091511146e-05
Epoch 4554/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7725
Learning Rate: 1.0327936091511146e-05
Epoch 4555/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7341
Learning Rate: 1.0327936091511146e-05
Epoch 4556/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8579
Learning Rate: 1.0327936091511146e-05
Epoch 4557/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8090
Learning Rate: 1.0327936091511146e-05
Epoch 4558/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7799
Learning Rate: 1.0327936091511146e-05
Epoch 4559/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7861
Learning Rate: 1.0327936091511146e-05
Epoch 4560/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7645
Learning Rate: 1.0224656730596034e-05
Epoch 4561/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7946
Learning Rate: 1.0224656730596034e-05
Epoch 4562/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7221
Learning Rate: 1.0224656730596034e-05
Epoch 4563/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7291
Learning Rate: 1.0224656730596034e-05
Epoch 4564/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7759
Learning Rate: 1.0224656730596034e-05
Epoch 4565/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7959
Learning Rate: 1.0224656730596034e-05
Epoch 4566/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8500
Learning Rate: 1.0224656730596034e-05
Epoch 4567/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8154
Learning Rate: 1.0224656730596034e-05
Epoch 4568/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8384
Learning Rate: 1.0224656730596034e-05
Epoch 4569/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8801
Learning Rate: 1.0224656730596034e-05
Epoch 4570/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8391
Learning Rate: 1.0122410163290074e-05
Epoch 4571/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7934
Learning Rate: 1.0122410163290074e-05
Epoch 4572/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9689
Learning Rate: 1.0122410163290074e-05
Epoch 4573/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7516
Learning Rate: 1.0122410163290074e-05
Epoch 4574/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7903
Learning Rate: 1.0122410163290074e-05
Epoch 4575/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7694
Learning Rate: 1.0122410163290074e-05
Epoch 4576/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8526
Learning Rate: 1.0122410163290074e-05
Epoch 4577/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6634
Learning Rate: 1.0122410163290074e-05
Epoch 4578/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7860
Learning Rate: 1.0122410163290074e-05
Epoch 4579/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8452
Learning Rate: 1.0122410163290074e-05
Epoch 4580/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8701
Learning Rate: 1.0021186061657173e-05
Epoch 4581/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7967
Learning Rate: 1.0021186061657173e-05
Epoch 4582/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8159
Learning Rate: 1.0021186061657173e-05
Epoch 4583/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7296
Learning Rate: 1.0021186061657173e-05
Epoch 4584/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8251
Learning Rate: 1.0021186061657173e-05
Epoch 4585/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7683
Learning Rate: 1.0021186061657173e-05
Epoch 4586/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9158
Learning Rate: 1.0021186061657173e-05
Epoch 4587/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6915
Learning Rate: 1.0021186061657173e-05
Epoch 4588/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7396
Learning Rate: 1.0021186061657173e-05
Epoch 4589/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7753
Learning Rate: 1.0021186061657173e-05
Epoch 4590/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7250
Learning Rate: 9.920974201040601e-06
Epoch 4591/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8660
Learning Rate: 9.920974201040601e-06
Epoch 4592/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8832
Learning Rate: 9.920974201040601e-06
Epoch 4593/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7944
Learning Rate: 9.920974201040601e-06
Epoch 4594/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8009
Learning Rate: 9.920974201040601e-06
Epoch 4595/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9053
Learning Rate: 9.920974201040601e-06
Epoch 4596/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8193
Learning Rate: 9.920974201040601e-06
Epoch 4597/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7755
Learning Rate: 9.920974201040601e-06
Epoch 4598/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7538
Learning Rate: 9.920974201040601e-06
Epoch 4599/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9459
Learning Rate: 9.920974201040601e-06
Epoch 4600/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8197
Learning Rate: 9.821764459030195e-06
Epoch 4601/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8478
Learning Rate: 9.821764459030195e-06
Epoch 4602/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8534
Learning Rate: 9.821764459030195e-06
Epoch 4603/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7843
Learning Rate: 9.821764459030195e-06
Epoch 4604/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7161
Learning Rate: 9.821764459030195e-06
Epoch 4605/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8257
Learning Rate: 9.821764459030195e-06
Epoch 4606/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7712
Learning Rate: 9.821764459030195e-06
Epoch 4607/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8389
Learning Rate: 9.821764459030195e-06
Epoch 4608/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7719
Learning Rate: 9.821764459030195e-06
Epoch 4609/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7221
Learning Rate: 9.821764459030195e-06
Epoch 4610/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7445
Learning Rate: 9.723546814439892e-06
Epoch 4611/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7750
Learning Rate: 9.723546814439892e-06
Epoch 4612/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7565
Learning Rate: 9.723546814439892e-06
Epoch 4613/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7168
Learning Rate: 9.723546814439892e-06
Epoch 4614/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8335
Learning Rate: 9.723546814439892e-06
Epoch 4615/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7799
Learning Rate: 9.723546814439892e-06
Epoch 4616/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7307
Learning Rate: 9.723546814439892e-06
Epoch 4617/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8760
Learning Rate: 9.723546814439892e-06
Epoch 4618/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7857
Learning Rate: 9.723546814439892e-06
Epoch 4619/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7671
Learning Rate: 9.723546814439892e-06
Epoch 4620/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8065
Learning Rate: 9.626311346295493e-06
Epoch 4621/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7471
Learning Rate: 9.626311346295493e-06
Epoch 4622/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7981
Learning Rate: 9.626311346295493e-06
Epoch 4623/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9173
Learning Rate: 9.626311346295493e-06
Epoch 4624/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7936
Learning Rate: 9.626311346295493e-06
Epoch 4625/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6904
Learning Rate: 9.626311346295493e-06
Epoch 4626/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8342
Learning Rate: 9.626311346295493e-06
Epoch 4627/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7357
Learning Rate: 9.626311346295493e-06
Epoch 4628/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7459
Learning Rate: 9.626311346295493e-06
Epoch 4629/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7502
Learning Rate: 9.626311346295493e-06
Epoch 4630/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9090
Learning Rate: 9.530048232832538e-06
Epoch 4631/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7416
Learning Rate: 9.530048232832538e-06
Epoch 4632/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8055
Learning Rate: 9.530048232832538e-06
Epoch 4633/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8628
Learning Rate: 9.530048232832538e-06
Epoch 4634/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9095
Learning Rate: 9.530048232832538e-06
Epoch 4635/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7516
Learning Rate: 9.530048232832538e-06
Epoch 4636/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7930
Learning Rate: 9.530048232832538e-06
Epoch 4637/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9244
Learning Rate: 9.530048232832538e-06
Epoch 4638/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9100
Learning Rate: 9.530048232832538e-06
Epoch 4639/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7798
Learning Rate: 9.530048232832538e-06
Epoch 4640/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8335
Learning Rate: 9.434747750504214e-06
Epoch 4641/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8522
Learning Rate: 9.434747750504214e-06
Epoch 4642/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7726
Learning Rate: 9.434747750504214e-06
Epoch 4643/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8072
Learning Rate: 9.434747750504214e-06
Epoch 4644/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7323
Learning Rate: 9.434747750504214e-06
Epoch 4645/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8228
Learning Rate: 9.434747750504214e-06
Epoch 4646/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8167
Learning Rate: 9.434747750504214e-06
Epoch 4647/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7719
Learning Rate: 9.434747750504214e-06
Epoch 4648/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7545
Learning Rate: 9.434747750504214e-06
Epoch 4649/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7958
Learning Rate: 9.434747750504214e-06
Epoch 4650/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8934
Learning Rate: 9.34040027299917e-06
Epoch 4651/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8073
Learning Rate: 9.34040027299917e-06
Epoch 4652/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9197
Learning Rate: 9.34040027299917e-06
Epoch 4653/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8186
Learning Rate: 9.34040027299917e-06
Epoch 4654/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8157
Learning Rate: 9.34040027299917e-06
Epoch 4655/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8159
Learning Rate: 9.34040027299917e-06
Epoch 4656/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7582
Learning Rate: 9.34040027299917e-06
Epoch 4657/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8120
Learning Rate: 9.34040027299917e-06
Epoch 4658/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8577
Learning Rate: 9.34040027299917e-06
Epoch 4659/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7193
Learning Rate: 9.34040027299917e-06
Epoch 4660/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7558
Learning Rate: 9.24699627026918e-06
Epoch 4661/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7986
Learning Rate: 9.24699627026918e-06
Epoch 4662/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7732
Learning Rate: 9.24699627026918e-06
Epoch 4663/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7567
Learning Rate: 9.24699627026918e-06
Epoch 4664/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7553
Learning Rate: 9.24699627026918e-06
Epoch 4665/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7465
Learning Rate: 9.24699627026918e-06
Epoch 4666/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7543
Learning Rate: 9.24699627026918e-06
Epoch 4667/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8011
Learning Rate: 9.24699627026918e-06
Epoch 4668/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7630
Learning Rate: 9.24699627026918e-06
Epoch 4669/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8245
Learning Rate: 9.24699627026918e-06
Epoch 4670/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8292
Learning Rate: 9.154526307566487e-06
Epoch 4671/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8234
Learning Rate: 9.154526307566487e-06
Epoch 4672/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7901
Learning Rate: 9.154526307566487e-06
Epoch 4673/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9127
Learning Rate: 9.154526307566487e-06
Epoch 4674/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7704
Learning Rate: 9.154526307566487e-06
Epoch 4675/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7440
Learning Rate: 9.154526307566487e-06
Epoch 4676/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7839
Learning Rate: 9.154526307566487e-06
Epoch 4677/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6839
Learning Rate: 9.154526307566487e-06
Epoch 4678/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8747
Learning Rate: 9.154526307566487e-06
Epoch 4679/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8587
Learning Rate: 9.154526307566487e-06
Epoch 4680/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8297
Learning Rate: 9.062981044490821e-06
Epoch 4681/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7358
Learning Rate: 9.062981044490821e-06
Epoch 4682/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8028
Learning Rate: 9.062981044490821e-06
Epoch 4683/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9020
Learning Rate: 9.062981044490821e-06
Epoch 4684/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7892
Learning Rate: 9.062981044490821e-06
Epoch 4685/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7906
Learning Rate: 9.062981044490821e-06
Epoch 4686/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9811
Learning Rate: 9.062981044490821e-06
Epoch 4687/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8331
Learning Rate: 9.062981044490821e-06
Epoch 4688/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7341
Learning Rate: 9.062981044490821e-06
Epoch 4689/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8994
Learning Rate: 9.062981044490821e-06
Epoch 4690/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7235
Learning Rate: 8.972351234045913e-06
Epoch 4691/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7282
Learning Rate: 8.972351234045913e-06
Epoch 4692/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7585
Learning Rate: 8.972351234045913e-06
Epoch 4693/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8133
Learning Rate: 8.972351234045913e-06
Epoch 4694/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7739
Learning Rate: 8.972351234045913e-06
Epoch 4695/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7800
Learning Rate: 8.972351234045913e-06
Epoch 4696/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7808
Learning Rate: 8.972351234045913e-06
Epoch 4697/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7784
Learning Rate: 8.972351234045913e-06
Epoch 4698/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7899
Learning Rate: 8.972351234045913e-06
Epoch 4699/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7200
Learning Rate: 8.972351234045913e-06
Epoch 4700/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6877
Learning Rate: 8.882627721705453e-06
Epoch 4701/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7940
Learning Rate: 8.882627721705453e-06
Epoch 4702/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8331
Learning Rate: 8.882627721705453e-06
Epoch 4703/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7848
Learning Rate: 8.882627721705453e-06
Epoch 4704/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7887
Learning Rate: 8.882627721705453e-06
Epoch 4705/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8881
Learning Rate: 8.882627721705453e-06
Epoch 4706/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7751
Learning Rate: 8.882627721705453e-06
Epoch 4707/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7931
Learning Rate: 8.882627721705453e-06
Epoch 4708/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7886
Learning Rate: 8.882627721705453e-06
Epoch 4709/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8419
Learning Rate: 8.882627721705453e-06
Epoch 4710/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8936
Learning Rate: 8.7938014444884e-06
Epoch 4711/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7464
Learning Rate: 8.7938014444884e-06
Epoch 4712/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8127
Learning Rate: 8.7938014444884e-06
Epoch 4713/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7720
Learning Rate: 8.7938014444884e-06
Epoch 4714/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8093
Learning Rate: 8.7938014444884e-06
Epoch 4715/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8749
Learning Rate: 8.7938014444884e-06
Epoch 4716/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7842
Learning Rate: 8.7938014444884e-06
Epoch 4717/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8054
Learning Rate: 8.7938014444884e-06
Epoch 4718/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7864
Learning Rate: 8.7938014444884e-06
Epoch 4719/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8463
Learning Rate: 8.7938014444884e-06
Epoch 4720/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8632
Learning Rate: 8.705863430043516e-06
Epoch 4721/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8278
Learning Rate: 8.705863430043516e-06
Epoch 4722/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8546
Learning Rate: 8.705863430043516e-06
Epoch 4723/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8048
Learning Rate: 8.705863430043516e-06
Epoch 4724/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8049
Learning Rate: 8.705863430043516e-06
Epoch 4725/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8612
Learning Rate: 8.705863430043516e-06
Epoch 4726/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8654
Learning Rate: 8.705863430043516e-06
Epoch 4727/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7376
Learning Rate: 8.705863430043516e-06
Epoch 4728/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7923
Learning Rate: 8.705863430043516e-06
Epoch 4729/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6928
Learning Rate: 8.705863430043516e-06
Epoch 4730/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7222
Learning Rate: 8.61880479574308e-06
Epoch 4731/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7583
Learning Rate: 8.61880479574308e-06
Epoch 4732/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7949
Learning Rate: 8.61880479574308e-06
Epoch 4733/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7852
Learning Rate: 8.61880479574308e-06
Epoch 4734/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8059
Learning Rate: 8.61880479574308e-06
Epoch 4735/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8699
Learning Rate: 8.61880479574308e-06
Epoch 4736/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7919
Learning Rate: 8.61880479574308e-06
Epoch 4737/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7553
Learning Rate: 8.61880479574308e-06
Epoch 4738/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9324
Learning Rate: 8.61880479574308e-06
Epoch 4739/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7496
Learning Rate: 8.61880479574308e-06
Epoch 4740/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7862
Learning Rate: 8.532616747785649e-06
Epoch 4741/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7481
Learning Rate: 8.532616747785649e-06
Epoch 4742/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7740
Learning Rate: 8.532616747785649e-06
Epoch 4743/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6826
Learning Rate: 8.532616747785649e-06
Epoch 4744/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9162
Learning Rate: 8.532616747785649e-06
Epoch 4745/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8878
Learning Rate: 8.532616747785649e-06
Epoch 4746/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7863
Learning Rate: 8.532616747785649e-06
Epoch 4747/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8490
Learning Rate: 8.532616747785649e-06
Epoch 4748/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8365
Learning Rate: 8.532616747785649e-06
Epoch 4749/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7939
Learning Rate: 8.532616747785649e-06
Epoch 4750/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7724
Learning Rate: 8.447290580307792e-06
Epoch 4751/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7804
Learning Rate: 8.447290580307792e-06
Epoch 4752/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7719
Learning Rate: 8.447290580307792e-06
Epoch 4753/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8154
Learning Rate: 8.447290580307792e-06
Epoch 4754/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7481
Learning Rate: 8.447290580307792e-06
Epoch 4755/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7311
Learning Rate: 8.447290580307792e-06
Epoch 4756/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7956
Learning Rate: 8.447290580307792e-06
Epoch 4757/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8045
Learning Rate: 8.447290580307792e-06
Epoch 4758/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8993
Learning Rate: 8.447290580307792e-06
Epoch 4759/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7587
Learning Rate: 8.447290580307792e-06
Epoch 4760/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8346
Learning Rate: 8.362817674504713e-06
Epoch 4761/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7758
Learning Rate: 8.362817674504713e-06
Epoch 4762/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7554
Learning Rate: 8.362817674504713e-06
Epoch 4763/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8209
Learning Rate: 8.362817674504713e-06
Epoch 4764/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7586
Learning Rate: 8.362817674504713e-06
Epoch 4765/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8146
Learning Rate: 8.362817674504713e-06
Epoch 4766/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8128
Learning Rate: 8.362817674504713e-06
Epoch 4767/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7696
Learning Rate: 8.362817674504713e-06
Epoch 4768/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8816
Learning Rate: 8.362817674504713e-06
Epoch 4769/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7818
Learning Rate: 8.362817674504713e-06
Epoch 4770/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7268
Learning Rate: 8.279189497759666e-06
Epoch 4771/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7794
Learning Rate: 8.279189497759666e-06
Epoch 4772/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7632
Learning Rate: 8.279189497759666e-06
Epoch 4773/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7513
Learning Rate: 8.279189497759666e-06
Epoch 4774/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8434
Learning Rate: 8.279189497759666e-06
Epoch 4775/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8710
Learning Rate: 8.279189497759666e-06
Epoch 4776/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7813
Learning Rate: 8.279189497759666e-06
Epoch 4777/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7308
Learning Rate: 8.279189497759666e-06
Epoch 4778/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8996
Learning Rate: 8.279189497759666e-06
Epoch 4779/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9701
Learning Rate: 8.279189497759666e-06
Epoch 4780/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7865
Learning Rate: 8.19639760278207e-06
Epoch 4781/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7613
Learning Rate: 8.19639760278207e-06
Epoch 4782/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7450
Learning Rate: 8.19639760278207e-06
Epoch 4783/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7509
Learning Rate: 8.19639760278207e-06
Epoch 4784/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7728
Learning Rate: 8.19639760278207e-06
Epoch 4785/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8636
Learning Rate: 8.19639760278207e-06
Epoch 4786/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8241
Learning Rate: 8.19639760278207e-06
Epoch 4787/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8083
Learning Rate: 8.19639760278207e-06
Epoch 4788/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8079
Learning Rate: 8.19639760278207e-06
Epoch 4789/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8334
Learning Rate: 8.19639760278207e-06
Epoch 4790/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6956
Learning Rate: 8.114433626754249e-06
Epoch 4791/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7167
Learning Rate: 8.114433626754249e-06
Epoch 4792/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7965
Learning Rate: 8.114433626754249e-06
Epoch 4793/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8062
Learning Rate: 8.114433626754249e-06
Epoch 4794/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7591
Learning Rate: 8.114433626754249e-06
Epoch 4795/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7317
Learning Rate: 8.114433626754249e-06
Epoch 4796/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8199
Learning Rate: 8.114433626754249e-06
Epoch 4797/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7884
Learning Rate: 8.114433626754249e-06
Epoch 4798/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8797
Learning Rate: 8.114433626754249e-06
Epoch 4799/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7673
Learning Rate: 8.114433626754249e-06
Epoch 4800/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8459
Learning Rate: 8.033289290486707e-06
Epoch 4801/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7728
Learning Rate: 8.033289290486707e-06
Epoch 4802/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7110
Learning Rate: 8.033289290486707e-06
Epoch 4803/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9894
Learning Rate: 8.033289290486707e-06
Epoch 4804/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7925
Learning Rate: 8.033289290486707e-06
Epoch 4805/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7520
Learning Rate: 8.033289290486707e-06
Epoch 4806/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6943
Learning Rate: 8.033289290486707e-06
Epoch 4807/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8593
Learning Rate: 8.033289290486707e-06
Epoch 4808/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7511
Learning Rate: 8.033289290486707e-06
Epoch 4809/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7996
Learning Rate: 8.033289290486707e-06
Epoch 4810/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9582
Learning Rate: 7.952956397581839e-06
Epoch 4811/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8475
Learning Rate: 7.952956397581839e-06
Epoch 4812/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8849
Learning Rate: 7.952956397581839e-06
Epoch 4813/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6679
Learning Rate: 7.952956397581839e-06
Epoch 4814/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7316
Learning Rate: 7.952956397581839e-06
Epoch 4815/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7969
Learning Rate: 7.952956397581839e-06
Epoch 4816/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8023
Learning Rate: 7.952956397581839e-06
Epoch 4817/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6737
Learning Rate: 7.952956397581839e-06
Epoch 4818/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8953
Learning Rate: 7.952956397581839e-06
Epoch 4819/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7276
Learning Rate: 7.952956397581839e-06
Epoch 4820/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9076
Learning Rate: 7.87342683360602e-06
Epoch 4821/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7869
Learning Rate: 7.87342683360602e-06
Epoch 4822/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8077
Learning Rate: 7.87342683360602e-06
Epoch 4823/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7911
Learning Rate: 7.87342683360602e-06
Epoch 4824/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7875
Learning Rate: 7.87342683360602e-06
Epoch 4825/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8015
Learning Rate: 7.87342683360602e-06
Epoch 4826/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8590
Learning Rate: 7.87342683360602e-06
Epoch 4827/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7892
Learning Rate: 7.87342683360602e-06
Epoch 4828/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7699
Learning Rate: 7.87342683360602e-06
Epoch 4829/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9194
Learning Rate: 7.87342683360602e-06
Epoch 4830/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7726
Learning Rate: 7.794692565269961e-06
Epoch 4831/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6967
Learning Rate: 7.794692565269961e-06
Epoch 4832/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7553
Learning Rate: 7.794692565269961e-06
Epoch 4833/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8114
Learning Rate: 7.794692565269961e-06
Epoch 4834/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7991
Learning Rate: 7.794692565269961e-06
Epoch 4835/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6640
Learning Rate: 7.794692565269961e-06
Epoch 4836/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6770
Learning Rate: 7.794692565269961e-06
Epoch 4837/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7448
Learning Rate: 7.794692565269961e-06
Epoch 4838/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7682
Learning Rate: 7.794692565269961e-06
Epoch 4839/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8150
Learning Rate: 7.794692565269961e-06
Epoch 4840/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8019
Learning Rate: 7.716745639617262e-06
Epoch 4841/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7885
Learning Rate: 7.716745639617262e-06
Epoch 4842/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6907
Learning Rate: 7.716745639617262e-06
Epoch 4843/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8292
Learning Rate: 7.716745639617262e-06
Epoch 4844/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8408
Learning Rate: 7.716745639617262e-06
Epoch 4845/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7874
Learning Rate: 7.716745639617262e-06
Epoch 4846/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7595
Learning Rate: 7.716745639617262e-06
Epoch 4847/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 3.0326
Learning Rate: 7.716745639617262e-06
Epoch 4848/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8186
Learning Rate: 7.716745639617262e-06
Epoch 4849/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7983
Learning Rate: 7.716745639617262e-06
Epoch 4850/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8326
Learning Rate: 7.639578183221089e-06
Epoch 4851/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8435
Learning Rate: 7.639578183221089e-06
Epoch 4852/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8164
Learning Rate: 7.639578183221089e-06
Epoch 4853/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7554
Learning Rate: 7.639578183221089e-06
Epoch 4854/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7911
Learning Rate: 7.639578183221089e-06
Epoch 4855/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9012
Learning Rate: 7.639578183221089e-06
Epoch 4856/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8229
Learning Rate: 7.639578183221089e-06
Epoch 4857/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8306
Learning Rate: 7.639578183221089e-06
Epoch 4858/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8649
Learning Rate: 7.639578183221089e-06
Epoch 4859/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6743
Learning Rate: 7.639578183221089e-06
Epoch 4860/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7677
Learning Rate: 7.563182401388878e-06
Epoch 4861/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7905
Learning Rate: 7.563182401388878e-06
Epoch 4862/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8219
Learning Rate: 7.563182401388878e-06
Epoch 4863/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8701
Learning Rate: 7.563182401388878e-06
Epoch 4864/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7419
Learning Rate: 7.563182401388878e-06
Epoch 4865/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8222
Learning Rate: 7.563182401388878e-06
Epoch 4866/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7851
Learning Rate: 7.563182401388878e-06
Epoch 4867/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9515
Learning Rate: 7.563182401388878e-06
Epoch 4868/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8958
Learning Rate: 7.563182401388878e-06
Epoch 4869/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7915
Learning Rate: 7.563182401388878e-06
Epoch 4870/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7699
Learning Rate: 7.487550577374989e-06
Epoch 4871/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7203
Learning Rate: 7.487550577374989e-06
Epoch 4872/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8823
Learning Rate: 7.487550577374989e-06
Epoch 4873/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7713
Learning Rate: 7.487550577374989e-06
Epoch 4874/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7245
Learning Rate: 7.487550577374989e-06
Epoch 4875/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8584
Learning Rate: 7.487550577374989e-06
Epoch 4876/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8406
Learning Rate: 7.487550577374989e-06
Epoch 4877/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8104
Learning Rate: 7.487550577374989e-06
Epoch 4878/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8487
Learning Rate: 7.487550577374989e-06
Epoch 4879/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8597
Learning Rate: 7.487550577374989e-06
Epoch 4880/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8638
Learning Rate: 7.412675071601239e-06
Epoch 4881/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8663
Learning Rate: 7.412675071601239e-06
Epoch 4882/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7762
Learning Rate: 7.412675071601239e-06
Epoch 4883/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7627
Learning Rate: 7.412675071601239e-06
Epoch 4884/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8286
Learning Rate: 7.412675071601239e-06
Epoch 4885/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6917
Learning Rate: 7.412675071601239e-06
Epoch 4886/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8305
Learning Rate: 7.412675071601239e-06
Epoch 4887/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7394
Learning Rate: 7.412675071601239e-06
Epoch 4888/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9274
Learning Rate: 7.412675071601239e-06
Epoch 4889/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8682
Learning Rate: 7.412675071601239e-06
Epoch 4890/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8027
Learning Rate: 7.338548320885227e-06
Epoch 4891/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8708
Learning Rate: 7.338548320885227e-06
Epoch 4892/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7970
Learning Rate: 7.338548320885227e-06
Epoch 4893/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9099
Learning Rate: 7.338548320885227e-06
Epoch 4894/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6982
Learning Rate: 7.338548320885227e-06
Epoch 4895/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7890
Learning Rate: 7.338548320885227e-06
Epoch 4896/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7677
Learning Rate: 7.338548320885227e-06
Epoch 4897/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7727
Learning Rate: 7.338548320885227e-06
Epoch 4898/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8681
Learning Rate: 7.338548320885227e-06
Epoch 4899/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8494
Learning Rate: 7.338548320885227e-06
Epoch 4900/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7869
Learning Rate: 7.2651628376763745e-06
Epoch 4901/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7891
Learning Rate: 7.2651628376763745e-06
Epoch 4902/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9087
Learning Rate: 7.2651628376763745e-06
Epoch 4903/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8938
Learning Rate: 7.2651628376763745e-06
Epoch 4904/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8072
Learning Rate: 7.2651628376763745e-06
Epoch 4905/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7794
Learning Rate: 7.2651628376763745e-06
Epoch 4906/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7963
Learning Rate: 7.2651628376763745e-06
Epoch 4907/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9444
Learning Rate: 7.2651628376763745e-06
Epoch 4908/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7569
Learning Rate: 7.2651628376763745e-06
Epoch 4909/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7943
Learning Rate: 7.2651628376763745e-06
Epoch 4910/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7505
Learning Rate: 7.1925112092996104e-06
Epoch 4911/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8240
Learning Rate: 7.1925112092996104e-06
Epoch 4912/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7661
Learning Rate: 7.1925112092996104e-06
Epoch 4913/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8010
Learning Rate: 7.1925112092996104e-06
Epoch 4914/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7613
Learning Rate: 7.1925112092996104e-06
Epoch 4915/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8385
Learning Rate: 7.1925112092996104e-06
Epoch 4916/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8506
Learning Rate: 7.1925112092996104e-06
Epoch 4917/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8906
Learning Rate: 7.1925112092996104e-06
Epoch 4918/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8709
Learning Rate: 7.1925112092996104e-06
Epoch 4919/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7838
Learning Rate: 7.1925112092996104e-06
Epoch 4920/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8558
Learning Rate: 7.120586097206614e-06
Epoch 4921/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8086
Learning Rate: 7.120586097206614e-06
Epoch 4922/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9370
Learning Rate: 7.120586097206614e-06
Epoch 4923/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6978
Learning Rate: 7.120586097206614e-06
Epoch 4924/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8223
Learning Rate: 7.120586097206614e-06
Epoch 4925/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7068
Learning Rate: 7.120586097206614e-06
Epoch 4926/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7479
Learning Rate: 7.120586097206614e-06
Epoch 4927/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7541
Learning Rate: 7.120586097206614e-06
Epoch 4928/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9518
Learning Rate: 7.120586097206614e-06
Epoch 4929/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8274
Learning Rate: 7.120586097206614e-06
Epoch 4930/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8148
Learning Rate: 7.049380236234548e-06
Epoch 4931/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7735
Learning Rate: 7.049380236234548e-06
Epoch 4932/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8090
Learning Rate: 7.049380236234548e-06
Epoch 4933/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7658
Learning Rate: 7.049380236234548e-06
Epoch 4934/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7812
Learning Rate: 7.049380236234548e-06
Epoch 4935/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8057
Learning Rate: 7.049380236234548e-06
Epoch 4936/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8613
Learning Rate: 7.049380236234548e-06
Epoch 4937/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7491
Learning Rate: 7.049380236234548e-06
Epoch 4938/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7542
Learning Rate: 7.049380236234548e-06
Epoch 4939/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7360
Learning Rate: 7.049380236234548e-06
Epoch 4940/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7868
Learning Rate: 6.978886433872203e-06
Epoch 4941/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.6914
Learning Rate: 6.978886433872203e-06
Epoch 4942/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8460
Learning Rate: 6.978886433872203e-06
Epoch 4943/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8926
Learning Rate: 6.978886433872203e-06
Epoch 4944/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7323
Learning Rate: 6.978886433872203e-06
Epoch 4945/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7655
Learning Rate: 6.978886433872203e-06
Epoch 4946/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7874
Learning Rate: 6.978886433872203e-06
Epoch 4947/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8178
Learning Rate: 6.978886433872203e-06
Epoch 4948/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7524
Learning Rate: 6.978886433872203e-06
Epoch 4949/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8394
Learning Rate: 6.978886433872203e-06
Epoch 4950/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7551
Learning Rate: 6.909097569533481e-06
Epoch 4951/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7619
Learning Rate: 6.909097569533481e-06
Epoch 4952/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8476
Learning Rate: 6.909097569533481e-06
Epoch 4953/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7622
Learning Rate: 6.909097569533481e-06
Epoch 4954/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8116
Learning Rate: 6.909097569533481e-06
Epoch 4955/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8279
Learning Rate: 6.909097569533481e-06
Epoch 4956/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8301
Learning Rate: 6.909097569533481e-06
Epoch 4957/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7045
Learning Rate: 6.909097569533481e-06
Epoch 4958/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8223
Learning Rate: 6.909097569533481e-06
Epoch 4959/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7511
Learning Rate: 6.909097569533481e-06
Epoch 4960/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9653
Learning Rate: 6.840006593838146e-06
Epoch 4961/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8512
Learning Rate: 6.840006593838146e-06
Epoch 4962/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7714
Learning Rate: 6.840006593838146e-06
Epoch 4963/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7894
Learning Rate: 6.840006593838146e-06
Epoch 4964/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7818
Learning Rate: 6.840006593838146e-06
Epoch 4965/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7566
Learning Rate: 6.840006593838146e-06
Epoch 4966/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8206
Learning Rate: 6.840006593838146e-06
Epoch 4967/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8067
Learning Rate: 6.840006593838146e-06
Epoch 4968/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9051
Learning Rate: 6.840006593838146e-06
Epoch 4969/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.9230
Learning Rate: 6.840006593838146e-06
Epoch 4970/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7825
Learning Rate: 6.771606527899764e-06
Epoch 4971/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7391
Learning Rate: 6.771606527899764e-06
Epoch 4972/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7626
Learning Rate: 6.771606527899764e-06
Epoch 4973/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8032
Learning Rate: 6.771606527899764e-06
Epoch 4974/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7728
Learning Rate: 6.771606527899764e-06
Epoch 4975/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7725
Learning Rate: 6.771606527899764e-06
Epoch 4976/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7647
Learning Rate: 6.771606527899764e-06
Epoch 4977/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7840
Learning Rate: 6.771606527899764e-06
Epoch 4978/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7809
Learning Rate: 6.771606527899764e-06
Epoch 4979/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7560
Learning Rate: 6.771606527899764e-06
Epoch 4980/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7498
Learning Rate: 6.7038904626207665e-06
Epoch 4981/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7064
Learning Rate: 6.7038904626207665e-06
Epoch 4982/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8002
Learning Rate: 6.7038904626207665e-06
Epoch 4983/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7710
Learning Rate: 6.7038904626207665e-06
Epoch 4984/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8423
Learning Rate: 6.7038904626207665e-06
Epoch 4985/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7633
Learning Rate: 6.7038904626207665e-06
Epoch 4986/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.8827
Learning Rate: 6.7038904626207665e-06
Epoch 4987/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7649
Learning Rate: 6.7038904626207665e-06
Epoch 4988/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7973
Learning Rate: 6.7038904626207665e-06
Epoch 4989/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7812
Learning Rate: 6.7038904626207665e-06
Epoch 4990/2500
----------


  0%|          | 0/8 [00:00<?, ?it/s]

train Loss: 2.7625
Learning Rate: 6.636851557994558e-06
