## End to End PNNL Surrogate Model Training and Testing

Important parameters:

channel = 1 or 2 # do others later

gridsize = 128 or 512 

w = 10 # anything from 1 to 499 (simLen) is okay. 

latentDim = 16 

## Imports

In [1]:
# EXPORT
# --- Must haves ---
import os, sys
sys.path.append('..')

import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.cuda as cuda
import torch.nn as nn
import torchvision
import torch.nn.functional as F

from surrogates4sims.pnnlDatasets import CCSI_2D

from surrogates4sims.utils import create_opt, create_one_cycle, find_lr, printNumModelParams, \
                                    rmse, writeMessage, plotSampleWprediction, plotSampleWpredictionByChannel, \
                                    plotSample, curl, jacobian, stream2uv, create_movie, convertSimToImage, \
                                    pkl_save, pkl_load, create_1_channel_movie

from surrogates4sims.models import Generator, Encoder, AE_no_P, AE_xhat_z, AE_xhat_zV2

import numpy as np
from tqdm import tqdm
from copy import deepcopy
from glob import glob

## Settings

In [2]:
# data 
eval_only=False
DEBUG = False
# model name, for tensorboard recording and checkpointing purposes.
versionName = "pnnl_end2end_plateau_train"

# GPU Numbers to use. Comma seprate them for multi-GPUs.
gpu_ids = "1"
versionName = versionName + '_GPUs{}'.format(gpu_ids.replace(',',''))
# path to load model weights.
pretrained_path = None

# rate at which to record metrics. (number of batches to average over when recording metrics, e.g. "every 5 batches")
tensorboard_rate = 5

# number of epochs to train. This is defined here so we can use the OneCycle LR Scheduler.
epochs = 1000

# Data Directory
channel = 2
gridsize = 128
dataDirec = '/data/ccsi/pnnl_liquid_inlet/channel_{}/gridsize_{}'.format(channel,gridsize)
preprocess = False # keep this as false until using the long runtime loader
testSplit = .2 # don't change this for now. 
AE = False
numWorkers = 2
physicsDim = 2 # inlet velocity and time sample

# checkpoint directory
cps = 'cps'
tensorboard_direc = "tb"

findLRs = False  

# LIN parameters
hiddenLayers = [128,128]
activation = nn.ELU()

# hyper-params
seed = 1234
np.random.seed(seed)
bz = 3
use_sigmoid_output = True
numSamplesToKeep = 1 #if not debugging
latentDim = 256
window_size = 5
filters = 128
num_conv = 4 # breaks when less than 2
simLen = 6
stack = True
simVizIndex = 0 # sim in the test set to visualize
createStreamFcn = False
doJacobian = False
repeat = 0
skip_connection = False
norm_layer = [torch.nn.Identity][0]
patience = 1
if DEBUG:
    epochs = 10000
    numSamplesToKeep = 2 # 1 simulation
    
versionName = versionName + '_channel{}_gridsize{}_latentDim{}_Samples2Keep{}'.format(channel, gridsize, latentDim,1)
versionName

'pnnl_end2end_plateau_train_GPUs1_channel2_gridsize128_latentDim256_Samples2Keep1'

## Select GPUs

In [3]:
!nvidia-smi

Fri Nov  6 18:34:25 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN Xp            On   | 00000000:02:00.0 Off |                  N/A |
| 23%   19C    P8     8W / 250W |      1MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            On   | 00000000:03:00.0 Off |                  N/A |
| 23%   24C    P8     9W / 250W |      1MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN Xp            On   | 00000000:81:00.0 Off |                  N/A |
| 53%   

In [4]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]=gpu_ids

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [6]:
if device.type == 'cuda':
    print(cuda.is_available())
    print(cuda.device_count())
    print(cuda.current_device())
    print(cuda.get_device_name())

True
1
0
TITAN Xp


In [7]:
a = torch.zeros(5, device=device.type)
!nvidia-smi

Fri Nov  6 18:34:28 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN Xp            On   | 00000000:02:00.0 Off |                  N/A |
| 23%   19C    P8     8W / 250W |      1MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            On   | 00000000:03:00.0 Off |                  N/A |
| 23%   26C    P2    59W / 250W |    573MiB / 12196MiB |      2%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN Xp            On   | 00000000:81:00.0 Off |                  N/A |
| 53%   

## Datasets & Loaders

In [8]:
sims = glob(os.path.join(dataDirec,'*.pkl'))
numSims = len(sims)
idx = int(testSplit*numSims)
testInds = np.linspace(1,numSims-2,idx).astype('int')
trainInds = list(set(np.arange(0,numSims)).difference(set(testInds)))
# perm = np.random.permutation(numSims)
# testInds = perm[:idx]
# trainInds = perm[idx:]
testSimFiles = [sims[idx] for idx in testInds]
trainSimFiles = [sims[idx] for idx in trainInds]
len(testSimFiles), len(trainSimFiles)

(10, 40)

In [9]:
testDataset = CCSI_2D(testSimFiles,doPreprocess=preprocess,numToKeep=numSamplesToKeep,channel=channel,AE=AE,
                      w=window_size, simLen=simLen)
trainDataset = CCSI_2D(trainSimFiles,doPreprocess=preprocess,numToKeep=numSamplesToKeep,channel=channel,AE=AE,
                      w=window_size, simLen=simLen)
# subset data to just first 6 frames (one input and 5 to predict)
x_data, physics_var = trainDataset.data[0]
trainDataset.data[0] = x_data[:6], physics_var
trainDataset.t = np.linspace(0,1,500).astype('float32')
testDataset = trainDataset
len(testDataset), len(trainDataset), len(trainDataset.data)

(6, 6, 1)

In [10]:
trainDataset[0][0].shape, trainDataset[0][1].shape, trainDataset[0][2].shape, trainDataset[0][3].shape

((1, 128, 128), (5, 1, 128, 128), (2,), (5, 2))

In [11]:
trainDataLoader = DataLoader(dataset=trainDataset, batch_size=bz, drop_last=True, 
                             num_workers=numWorkers, pin_memory=True, 
                             sampler=torch.utils.data.SubsetRandomSampler([0]*6))
testDataLoader = DataLoader(dataset=testDataset, batch_size=bz, num_workers=numWorkers, pin_memory=True, 
                             sampler=torch.utils.data.SubsetRandomSampler([0]*6))
len(trainDataLoader), len(testDataLoader)

(2, 2)

In [12]:
for i, x in enumerate(trainDataLoader):
    print(i, x[0].shape)

0 torch.Size([3, 1, 128, 128])
1 torch.Size([3, 1, 128, 128])


In [13]:
X,Y,p_x, p_y = next(iter(trainDataLoader))
print(X.shape,Y.shape,p_x.shape, p_y.shape)

torch.Size([3, 1, 128, 128]) torch.Size([3, 5, 1, 128, 128]) torch.Size([3, 2]) torch.Size([3, 5, 2])


## Model

In [14]:
X = X.to(device)
AE_model = AE_xhat_zV2(X, filters, latentDim, num_conv, repeat, 
                 skip_connection, stack, conv_k=3, last_k=3, 
                 act=nn.LeakyReLU(), return_z=True, stream=createStreamFcn, device=device, norm=norm_layer,
                      sigmoid_out=True)

AE_model = nn.DataParallel(AE_model)

[128, 8, 8]




In [15]:
printNumModelParams(AE_model)

90 layers require gradients (unfrozen) out of 90 layers
13,668,609 parameters require gradients (unfrozen) out of 13,668,609 parameters


In [16]:
Xhat,z = AE_model(X,p_x)
Xhat.shape, z.shape

(torch.Size([3, 1, 128, 128]), torch.Size([3, 256]))

In [17]:
AE_model.load_state_dict(torch.load(os.path.join('/home/bartoldson1/surrogates4sims/cps',
 'pnnl_plateau_train_GPUs23_channel2_gridsize128_latentDim256_filters128_bz32_numConv4_jacobianFalse_norm_layerIdentity_sigmoid_outTrue_epochs1000_stackTrue_lr0.0001')))

<All keys matched successfully>

In [18]:
# LIN Model
class MLP(nn.Module):
    def __init__(self, X, hiddenLayerSizes = [1024], activation=nn.ELU()):
        super(MLP,self).__init__()
        
        self.activation = activation
        self.inputSize = X.shape[1:]
        self.modules = []
        self.modules.append(nn.Linear(np.prod(self.inputSize),hiddenLayerSizes[0]))
        self.modules.append(self.activation)
        for idx,sz in enumerate(hiddenLayerSizes[:-1]):
            self.modules.append(nn.Linear(hiddenLayerSizes[idx],hiddenLayerSizes[idx+1]))
            self.modules.append(self.activation)
                               
        self.modules.append(nn.Linear(hiddenLayerSizes[-1],np.prod(self.inputSize)))
        self.layers = nn.Sequential(*self.modules)
                                
        
    def forward(self,x):
        x = self.layers(x)
        return x

In [19]:
LIN_model = MLP(z, hiddenLayerSizes=hiddenLayers, activation=activation)
LIN_model

MLP(
  (activation): ELU(alpha=1.0)
  (layers): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ELU(alpha=1.0)
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ELU(alpha=1.0)
    (4): Linear(in_features=128, out_features=256, bias=True)
  )
)

In [20]:
# surrogate class
class Surrogate(nn.Module):
    
    def __init__(self, window,
                 z_size, p_size,
                LIN, encoder, decoder):
        super(Surrogate, self).__init__()
        self.window = window
        self.z_size = z_size # this does not include the size of p
        self.p_size = p_size
        self.c_size = z_size + p_size # this does include the size of p
        self.LIN = LIN
        self.encoder = encoder
        self.decoder = decoder
        
    def encode(self, U):
        
        return self.encoder(U)
        
    def decode(self, encoding):
        
        return self.decoder(encoding)
        
    def predict_next_w_encodings(self, encoding, p_y, window):
        '''
        use the LIN to predict the next w encodings for each 
        encoded U in the batch
        '''
            
        predicted_encodings = []
            
        # given a batch of encodings, advance each encoding window time steps.
        # save the result at each time step
        for i in range(window):
            encoding = self.LIN(encoding) + encoding # use LIN to predict delta in encoding
            # this was encoding[:,:,-self.p_size:] in 09_manta..., why the extra dimension?
            encoding[:,-self.p_size:] = p_y[:, i]
            predicted_encodings.append(encoding)
            
            
        return torch.stack(predicted_encodings)
    
    def forward(self, U, p_x, p_y, window = None):
        
        if window == None:
            window = self.window
        assert p_y.size(1) == window
            
        encoding = self.encode(U)
        encoding[:,-self.p_size:] = p_x # added this on 10/27/2020
        encoding_w = self.predict_next_w_encodings(encoding, p_y, window)
        # want to have this agree with U_y, which is [batch_size, window_size, channels, nx, ny]
        # right now, it's [window_size, batch_size, c_size], so transpose dimensions 0 and 1
        #print(encoding_w.shape)
        U = torch.stack([self.decode(encoding_i) for encoding_i in encoding_w])
        return U.transpose(0,1)
    

In [21]:
surrogate = Surrogate(window_size, latentDim - physicsDim, physicsDim, LIN_model, AE_model.module.encoder, 
                      AE_model.module.generator)

In [22]:
surrogate = surrogate.to(device)

In [23]:
encoding = surrogate.encode(X)
encoding.shape

torch.Size([3, 256])

In [24]:
decoding = surrogate.decode(encoding)
decoding.shape

torch.Size([3, 1, 128, 128])

In [25]:
assert surrogate.c_size == latentDim
assert surrogate.p_size == physicsDim
assert encoding.shape[-1] == surrogate.c_size
assert decoding.shape == X.shape

In [26]:
Xhat = surrogate.forward(X, p_x, p_y)
Xhat.shape

torch.Size([3, 5, 1, 128, 128])

In [27]:
del surrogate, encoding, decoding, X, Y

surrogate = Surrogate(window_size, latentDim - physicsDim, physicsDim, LIN_model, 
                      AE_model.module.encoder, AE_model.module.generator).to(device)

if len(gpu_ids.split(',')) > 1:
    surrogate = nn.DataParallel(surrogate)

In [28]:
max_lr = .0001
start_lr = 5*max_lr/10
#opt = create_opt(max_lr,model)
#lr_scheduler = create_one_cycle(opt,max_lr,epochs,trainDataLoader)
opt = torch.optim.Adam(surrogate.parameters(),lr=max_lr,betas=(.5,.999))
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,patience=patience)

In [29]:
def L1_loss(pred, target):
    return torch.norm(pred - target)/torch.norm(target)


def jacobian_loss(pred, target, device='cpu'):
    return L1_loss(jacobian(pred, device), jacobian(target, device))


def curl_loss(pred, target, device):
    return L1_loss(curl(pred, device), curl(target, device))


L = nn.MSELoss()


def p_loss(pred, target):
    return L(pred[:, -target.shape[1]:], target)


def loss(pred, target, device):
    
    if createStreamFcn:
        pred = stream2uv(pred, device)
        
    L1 = L1_loss(pred, target)
    Lj = 0
    if doJacobian:
        Lj = jacobian_loss(pred, target, device)
        
    return L1 + Lj

In [30]:
def trainEpoch(myDataLoader, tensorboard_writer, model, opt, p_loss, loss,
               metric, lr_scheduler, tensorboard_rate, device,
               tensorboard_recorder_step, total_steps):
    running_loss = 0.0
    running_rmse = 0.0
    total_loss = 0.0
    running_ploss = 0.0
    for i, sampleBatch in enumerate(myDataLoader, start=1):

        # --- Main Training ---
        
        # gpu
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.to(device)
        p_x = p_x.to(device)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
            

        # zero the parameter gradients
        opt.zero_grad()

        U_hat = model(U_x, p_x, p_y)
        pl = 0
        ll = loss(U_hat, U_y, device)
        combined_loss = pl + ll
        combined_loss.backward()
        opt.step()
        
        # loss
        batch_loss = combined_loss.item()
        running_loss += batch_loss
        total_loss += batch_loss
        
        batch_ploss = pl
        running_ploss += batch_ploss

        # --- Metrics Recording ---

        # metrics
        r = metric(U_hat, U_y)
        running_rmse += r

        # record lr change
        total_steps += 1
        tensorboard_writer.add_scalar(tag="LR", scalar_value=opt.param_groups[0]['lr'], global_step=total_steps)
        #lr_scheduler.step()

        # tensorboard writes
        if (i % tensorboard_rate == 0):
            tensorboard_recorder_step += 1
            avg_running_loss = running_loss/tensorboard_rate
            avg_running_rmse = running_rmse/tensorboard_rate
            avg_running_ploss = running_ploss/tensorboard_rate
            tensorboard_writer.add_scalar(tag="Loss", scalar_value=avg_running_loss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag="p_loss", scalar_value=avg_running_ploss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag=metric.__name__, scalar_value=avg_running_rmse, global_step=tensorboard_recorder_step)
            # reset running_loss for the next set of batches. (tensorboard_rate number of batches)
            running_loss = 0.0
            running_rmse = 0.0
            running_ploss = 0.0
            tensorboard_writer.flush()

    return total_loss/len(myDataLoader), tensorboard_recorder_step, total_steps

In [31]:
def validEpoch(myDataLoader, tensorboard_writer, model, p_loss, loss, metric,
               device, tensorboard_recorder_step):
    running_loss = 0.0
    running_rmse = 0.0
    for i, sampleBatch in enumerate(myDataLoader, start=1):

        # --- Metrics Recording ---

        # gpu
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.to(device) # only squeeze away the window dimension (because batch size = 1)
        p_x = p_x.to(device) # only squeeze away the window dimension (because batch size = 1)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
        
        perc = len(U_x)/len(myDataLoader.dataset)

        # forward, no gradient calculations
        with torch.no_grad():
            U_hat = model(U_x, p_x, p_y, window = window_size)

        # loss
        combined_loss = loss(U_hat, U_y, device)
        
        running_loss += perc*(combined_loss.item())

        # metrics
        r = metric(U_hat, U_y)
        running_rmse += perc*r

    avg_running_loss = running_loss
    avg_running_rmse = running_rmse
    tensorboard_writer.add_scalar(tag="Loss", scalar_value=avg_running_loss, global_step=tensorboard_recorder_step)
    tensorboard_writer.add_scalar(tag=metric.__name__, scalar_value=avg_running_rmse, global_step=tensorboard_recorder_step)
    tensorboard_writer.flush()
    
    return running_loss

In [32]:
try:
    os.mkdir(cps)
except:
    print("checkpoints directory already exists :)")
    
# create a summary writer.
train_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'train'))
test_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'valid'))
tensorboard_recorder_step = 0
total_steps = 0

checkpoints directory already exists :)


In [47]:
writeMessage('---------- Started Training ----------', versionName)
bestLoss = np.infty

if not eval_only:
    for epoch in tqdm(range(1, epochs+1)):  # loop over the dataset multiple times

        writeMessage("--- Epoch {0}/{1} ---".format(epoch, epochs), versionName)

        surrogate.train()
        trainLoss, tensorboard_recorder_step, total_steps = trainEpoch(trainDataLoader, 
                                                                       train_writer, surrogate,
                                                                       opt, p_loss, loss,
                                                                       rmse, lr_scheduler, 
                                                                       tensorboard_rate, device,
                                                                       tensorboard_recorder_step, total_steps)

        writeMessage("trainLoss: {:.4e}".format(trainLoss),versionName)
        writeMessage("LR: {:.4e}".format(opt.param_groups[0]['lr']),versionName)
#         if trainLoss < bestLoss:
#             bestLoss = trainLoss
#             writeMessage("Better trainLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
#             torch.save(surrogate.state_dict(), os.path.join(cps,versionName))

        surrogate.eval()
        valLoss = validEpoch(testDataLoader, test_writer, surrogate, p_loss, loss, rmse, device, tensorboard_recorder_step)
        writeMessage("valLoss: {:.4e}".format(valLoss),versionName)

        # checkpoint progress
        if valLoss < bestLoss:
            bestLoss = valLoss
            writeMessage("Better valLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
            torch.save(surrogate.state_dict(), os.path.join(cps,versionName))

        lr_scheduler.step(trainLoss)

        if opt.param_groups[0]['lr'] < 5e-8:
            break
    writeMessage('---------- Finished Training ----------', versionName)

  0%|          | 0/1000 [00:00<?, ?it/s]

---------- Started Training ----------
--- Epoch 1/1000 ---
trainLoss: 1.1807e-02
LR: 1.0000e-07
valLoss: 1.1804e-02
Better valLoss: 1.1804e-02, Saving models...


  0%|          | 1/1000 [00:01<17:34,  1.06s/it]

--- Epoch 2/1000 ---
trainLoss: 1.1803e-02
LR: 1.0000e-07
valLoss: 1.1800e-02
Better valLoss: 1.1800e-02, Saving models...


  0%|          | 2/1000 [00:02<17:23,  1.05s/it]

--- Epoch 3/1000 ---
trainLoss: 1.1799e-02
LR: 1.0000e-07
valLoss: 1.1796e-02
Better valLoss: 1.1796e-02, Saving models...


  0%|          | 3/1000 [00:03<17:13,  1.04s/it]

--- Epoch 4/1000 ---
trainLoss: 1.1794e-02
LR: 1.0000e-07
valLoss: 1.1791e-02
Better valLoss: 1.1791e-02, Saving models...


  0%|          | 4/1000 [00:04<17:00,  1.02s/it]

--- Epoch 5/1000 ---
trainLoss: 1.1790e-02
LR: 1.0000e-07
valLoss: 1.1787e-02
Better valLoss: 1.1787e-02, Saving models...


  0%|          | 5/1000 [00:05<16:48,  1.01s/it]

--- Epoch 6/1000 ---
trainLoss: 1.1786e-02
LR: 1.0000e-07
valLoss: 1.1783e-02
Better valLoss: 1.1783e-02, Saving models...


  1%|          | 6/1000 [00:06<16:46,  1.01s/it]

--- Epoch 7/1000 ---
trainLoss: 1.1782e-02
LR: 1.0000e-07
valLoss: 1.1779e-02
Better valLoss: 1.1779e-02, Saving models...


  1%|          | 7/1000 [00:07<16:54,  1.02s/it]

--- Epoch 8/1000 ---
trainLoss: 1.1778e-02
LR: 1.0000e-07
valLoss: 1.1775e-02
Better valLoss: 1.1775e-02, Saving models...


  1%|          | 8/1000 [00:08<16:34,  1.00s/it]

--- Epoch 9/1000 ---
trainLoss: 1.1774e-02
LR: 1.0000e-07
valLoss: 1.1771e-02
Better valLoss: 1.1771e-02, Saving models...


  1%|          | 9/1000 [00:09<16:35,  1.00s/it]

--- Epoch 10/1000 ---
trainLoss: 1.1769e-02
LR: 1.0000e-07
valLoss: 1.1766e-02
Better valLoss: 1.1766e-02, Saving models...


  1%|          | 10/1000 [00:10<16:39,  1.01s/it]

--- Epoch 11/1000 ---
trainLoss: 1.1765e-02
LR: 1.0000e-07
valLoss: 1.1762e-02
Better valLoss: 1.1762e-02, Saving models...


  1%|          | 11/1000 [00:11<16:34,  1.01s/it]

--- Epoch 12/1000 ---
trainLoss: 1.1761e-02
LR: 1.0000e-07
valLoss: 1.1758e-02
Better valLoss: 1.1758e-02, Saving models...


  1%|          | 12/1000 [00:12<16:13,  1.02it/s]

--- Epoch 13/1000 ---
trainLoss: 1.1757e-02
LR: 1.0000e-07
valLoss: 1.1754e-02
Better valLoss: 1.1754e-02, Saving models...


  1%|▏         | 13/1000 [00:13<16:04,  1.02it/s]

--- Epoch 14/1000 ---
trainLoss: 1.1753e-02
LR: 1.0000e-07
valLoss: 1.1750e-02
Better valLoss: 1.1750e-02, Saving models...


  1%|▏         | 14/1000 [00:13<15:55,  1.03it/s]

--- Epoch 15/1000 ---
trainLoss: 1.1749e-02
LR: 1.0000e-07
valLoss: 1.1745e-02
Better valLoss: 1.1745e-02, Saving models...


  2%|▏         | 15/1000 [00:14<15:59,  1.03it/s]

--- Epoch 16/1000 ---
trainLoss: 1.1744e-02
LR: 1.0000e-07
valLoss: 1.1741e-02
Better valLoss: 1.1741e-02, Saving models...


  2%|▏         | 16/1000 [00:16<16:25,  1.00s/it]

--- Epoch 17/1000 ---
trainLoss: 1.1740e-02
LR: 1.0000e-07
valLoss: 1.1737e-02
Better valLoss: 1.1737e-02, Saving models...


  2%|▏         | 17/1000 [00:17<16:53,  1.03s/it]

--- Epoch 18/1000 ---
trainLoss: 1.1736e-02
LR: 1.0000e-07
valLoss: 1.1733e-02
Better valLoss: 1.1733e-02, Saving models...


  2%|▏         | 18/1000 [00:18<16:42,  1.02s/it]

--- Epoch 19/1000 ---
trainLoss: 1.1732e-02
LR: 1.0000e-07
valLoss: 1.1729e-02
Better valLoss: 1.1729e-02, Saving models...


  2%|▏         | 19/1000 [00:19<16:53,  1.03s/it]

--- Epoch 20/1000 ---
trainLoss: 1.1728e-02
LR: 1.0000e-07
valLoss: 1.1725e-02
Better valLoss: 1.1725e-02, Saving models...


  2%|▏         | 20/1000 [00:20<17:10,  1.05s/it]

--- Epoch 21/1000 ---
trainLoss: 1.1724e-02
LR: 1.0000e-07
valLoss: 1.1720e-02
Better valLoss: 1.1720e-02, Saving models...


  2%|▏         | 21/1000 [00:21<17:12,  1.05s/it]

--- Epoch 22/1000 ---
trainLoss: 1.1719e-02
LR: 1.0000e-07
valLoss: 1.1716e-02
Better valLoss: 1.1716e-02, Saving models...


  2%|▏         | 22/1000 [00:22<16:44,  1.03s/it]

--- Epoch 23/1000 ---
trainLoss: 1.1715e-02
LR: 1.0000e-07
valLoss: 1.1712e-02
Better valLoss: 1.1712e-02, Saving models...


  2%|▏         | 23/1000 [00:23<16:40,  1.02s/it]

--- Epoch 24/1000 ---
trainLoss: 1.1711e-02
LR: 1.0000e-07
valLoss: 1.1708e-02
Better valLoss: 1.1708e-02, Saving models...


  2%|▏         | 24/1000 [00:24<16:54,  1.04s/it]

--- Epoch 25/1000 ---
trainLoss: 1.1707e-02
LR: 1.0000e-07
valLoss: 1.1704e-02
Better valLoss: 1.1704e-02, Saving models...


  2%|▎         | 25/1000 [00:25<16:42,  1.03s/it]

--- Epoch 26/1000 ---
trainLoss: 1.1703e-02
LR: 1.0000e-07
valLoss: 1.1700e-02
Better valLoss: 1.1700e-02, Saving models...


  3%|▎         | 26/1000 [00:26<16:19,  1.01s/it]

--- Epoch 27/1000 ---
trainLoss: 1.1699e-02
LR: 1.0000e-07
valLoss: 1.1695e-02
Better valLoss: 1.1695e-02, Saving models...


  3%|▎         | 27/1000 [00:27<16:20,  1.01s/it]

--- Epoch 28/1000 ---
trainLoss: 1.1694e-02
LR: 1.0000e-07
valLoss: 1.1691e-02
Better valLoss: 1.1691e-02, Saving models...


  3%|▎         | 28/1000 [00:28<15:39,  1.03it/s]

--- Epoch 29/1000 ---
trainLoss: 1.1690e-02
LR: 1.0000e-07
valLoss: 1.1687e-02
Better valLoss: 1.1687e-02, Saving models...


  3%|▎         | 29/1000 [00:29<15:49,  1.02it/s]

--- Epoch 30/1000 ---
trainLoss: 1.1686e-02
LR: 1.0000e-07
valLoss: 1.1683e-02
Better valLoss: 1.1683e-02, Saving models...


  3%|▎         | 30/1000 [00:30<15:50,  1.02it/s]

--- Epoch 31/1000 ---
trainLoss: 1.1682e-02
LR: 1.0000e-07
valLoss: 1.1679e-02
Better valLoss: 1.1679e-02, Saving models...


  3%|▎         | 31/1000 [00:31<16:01,  1.01it/s]

--- Epoch 32/1000 ---
trainLoss: 1.1678e-02
LR: 1.0000e-07
valLoss: 1.1674e-02
Better valLoss: 1.1674e-02, Saving models...


  3%|▎         | 32/1000 [00:32<15:45,  1.02it/s]

--- Epoch 33/1000 ---
trainLoss: 1.1673e-02
LR: 1.0000e-07
valLoss: 1.1670e-02
Better valLoss: 1.1670e-02, Saving models...


  3%|▎         | 33/1000 [00:33<15:51,  1.02it/s]

--- Epoch 34/1000 ---
trainLoss: 1.1669e-02
LR: 1.0000e-07
valLoss: 1.1666e-02
Better valLoss: 1.1666e-02, Saving models...


  3%|▎         | 34/1000 [00:34<16:20,  1.01s/it]

--- Epoch 35/1000 ---
trainLoss: 1.1665e-02
LR: 1.0000e-07
valLoss: 1.1662e-02
Better valLoss: 1.1662e-02, Saving models...


  4%|▎         | 35/1000 [00:35<16:07,  1.00s/it]

--- Epoch 36/1000 ---
trainLoss: 1.1661e-02
LR: 1.0000e-07
valLoss: 1.1658e-02
Better valLoss: 1.1658e-02, Saving models...


  4%|▎         | 36/1000 [00:36<16:22,  1.02s/it]

--- Epoch 37/1000 ---
trainLoss: 1.1657e-02
LR: 1.0000e-07
valLoss: 1.1653e-02
Better valLoss: 1.1653e-02, Saving models...


  4%|▎         | 37/1000 [00:37<16:35,  1.03s/it]

--- Epoch 38/1000 ---
trainLoss: 1.1652e-02
LR: 1.0000e-07
valLoss: 1.1649e-02
Better valLoss: 1.1649e-02, Saving models...


  4%|▍         | 38/1000 [00:38<16:09,  1.01s/it]

--- Epoch 39/1000 ---
trainLoss: 1.1648e-02
LR: 1.0000e-07
valLoss: 1.1645e-02
Better valLoss: 1.1645e-02, Saving models...


  4%|▍         | 39/1000 [00:39<16:17,  1.02s/it]

--- Epoch 40/1000 ---
trainLoss: 1.1644e-02
LR: 1.0000e-07
valLoss: 1.1641e-02
Better valLoss: 1.1641e-02, Saving models...


  4%|▍         | 40/1000 [00:40<16:27,  1.03s/it]

--- Epoch 41/1000 ---
trainLoss: 1.1640e-02
LR: 1.0000e-07
valLoss: 1.1637e-02
Better valLoss: 1.1637e-02, Saving models...


  4%|▍         | 41/1000 [00:41<16:33,  1.04s/it]

--- Epoch 42/1000 ---
trainLoss: 1.1636e-02
LR: 1.0000e-07
valLoss: 1.1632e-02
Better valLoss: 1.1632e-02, Saving models...


  4%|▍         | 42/1000 [00:42<16:19,  1.02s/it]

--- Epoch 43/1000 ---
trainLoss: 1.1631e-02
LR: 1.0000e-07
valLoss: 1.1628e-02
Better valLoss: 1.1628e-02, Saving models...


  4%|▍         | 43/1000 [00:43<16:10,  1.01s/it]

--- Epoch 44/1000 ---
trainLoss: 1.1627e-02
LR: 1.0000e-07
valLoss: 1.1624e-02
Better valLoss: 1.1624e-02, Saving models...


  4%|▍         | 44/1000 [00:44<16:09,  1.01s/it]

--- Epoch 45/1000 ---
trainLoss: 1.1623e-02
LR: 1.0000e-07
valLoss: 1.1620e-02
Better valLoss: 1.1620e-02, Saving models...


  4%|▍         | 45/1000 [00:45<15:54,  1.00it/s]

--- Epoch 46/1000 ---
trainLoss: 1.1619e-02
LR: 1.0000e-07
valLoss: 1.1616e-02
Better valLoss: 1.1616e-02, Saving models...


  5%|▍         | 46/1000 [00:46<16:14,  1.02s/it]

--- Epoch 47/1000 ---
trainLoss: 1.1615e-02
LR: 1.0000e-07
valLoss: 1.1611e-02
Better valLoss: 1.1611e-02, Saving models...


  5%|▍         | 47/1000 [00:47<16:01,  1.01s/it]

--- Epoch 48/1000 ---
trainLoss: 1.1610e-02
LR: 1.0000e-07
valLoss: 1.1607e-02
Better valLoss: 1.1607e-02, Saving models...


  5%|▍         | 48/1000 [00:48<15:50,  1.00it/s]

--- Epoch 49/1000 ---
trainLoss: 1.1606e-02
LR: 1.0000e-07
valLoss: 1.1603e-02
Better valLoss: 1.1603e-02, Saving models...


  5%|▍         | 49/1000 [00:49<15:54,  1.00s/it]

--- Epoch 50/1000 ---
trainLoss: 1.1602e-02
LR: 1.0000e-07
valLoss: 1.1599e-02
Better valLoss: 1.1599e-02, Saving models...


  5%|▌         | 50/1000 [00:50<15:56,  1.01s/it]

--- Epoch 51/1000 ---
trainLoss: 1.1598e-02
LR: 1.0000e-07
valLoss: 1.1595e-02
Better valLoss: 1.1595e-02, Saving models...


  5%|▌         | 51/1000 [00:51<15:55,  1.01s/it]

--- Epoch 52/1000 ---
trainLoss: 1.1594e-02
LR: 1.0000e-07
valLoss: 1.1590e-02
Better valLoss: 1.1590e-02, Saving models...


  5%|▌         | 52/1000 [00:52<15:46,  1.00it/s]

--- Epoch 53/1000 ---
trainLoss: 1.1589e-02
LR: 1.0000e-07
valLoss: 1.1586e-02
Better valLoss: 1.1586e-02, Saving models...


  5%|▌         | 53/1000 [00:53<15:47,  1.00s/it]

--- Epoch 54/1000 ---
trainLoss: 1.1585e-02
LR: 1.0000e-07
valLoss: 1.1582e-02
Better valLoss: 1.1582e-02, Saving models...


  5%|▌         | 54/1000 [00:54<16:02,  1.02s/it]

--- Epoch 55/1000 ---
trainLoss: 1.1581e-02
LR: 1.0000e-07
valLoss: 1.1578e-02
Better valLoss: 1.1578e-02, Saving models...


  6%|▌         | 55/1000 [00:55<15:40,  1.00it/s]

--- Epoch 56/1000 ---
trainLoss: 1.1577e-02
LR: 1.0000e-07
valLoss: 1.1574e-02
Better valLoss: 1.1574e-02, Saving models...


  6%|▌         | 56/1000 [00:56<15:53,  1.01s/it]

--- Epoch 57/1000 ---
trainLoss: 1.1573e-02
LR: 1.0000e-07
valLoss: 1.1569e-02
Better valLoss: 1.1569e-02, Saving models...


  6%|▌         | 57/1000 [00:57<15:40,  1.00it/s]

--- Epoch 58/1000 ---
trainLoss: 1.1568e-02
LR: 1.0000e-07
valLoss: 1.1565e-02
Better valLoss: 1.1565e-02, Saving models...


  6%|▌         | 58/1000 [00:58<15:24,  1.02it/s]

--- Epoch 59/1000 ---
trainLoss: 1.1564e-02
LR: 1.0000e-07
valLoss: 1.1561e-02
Better valLoss: 1.1561e-02, Saving models...


  6%|▌         | 59/1000 [00:59<15:26,  1.02it/s]

--- Epoch 60/1000 ---
trainLoss: 1.1560e-02
LR: 1.0000e-07
valLoss: 1.1557e-02
Better valLoss: 1.1557e-02, Saving models...


  6%|▌         | 60/1000 [01:00<15:47,  1.01s/it]

--- Epoch 61/1000 ---
trainLoss: 1.1556e-02
LR: 1.0000e-07
valLoss: 1.1553e-02
Better valLoss: 1.1553e-02, Saving models...


  6%|▌         | 61/1000 [01:01<15:32,  1.01it/s]

--- Epoch 62/1000 ---
trainLoss: 1.1551e-02
LR: 1.0000e-07
valLoss: 1.1548e-02
Better valLoss: 1.1548e-02, Saving models...


  6%|▌         | 62/1000 [01:02<15:05,  1.04it/s]

--- Epoch 63/1000 ---
trainLoss: 1.1547e-02
LR: 1.0000e-07
valLoss: 1.1544e-02
Better valLoss: 1.1544e-02, Saving models...


  6%|▋         | 63/1000 [01:03<15:08,  1.03it/s]

--- Epoch 64/1000 ---
trainLoss: 1.1543e-02
LR: 1.0000e-07
valLoss: 1.1540e-02
Better valLoss: 1.1540e-02, Saving models...


  6%|▋         | 64/1000 [01:04<15:25,  1.01it/s]

--- Epoch 65/1000 ---
trainLoss: 1.1539e-02
LR: 1.0000e-07


  6%|▋         | 65/1000 [01:05<15:01,  1.04it/s]

valLoss: 1.1536e-02
Better valLoss: 1.1536e-02, Saving models...
--- Epoch 66/1000 ---
trainLoss: 1.1535e-02
LR: 1.0000e-07
valLoss: 1.1531e-02
Better valLoss: 1.1531e-02, Saving models...


  7%|▋         | 66/1000 [01:06<15:04,  1.03it/s]

--- Epoch 67/1000 ---
trainLoss: 1.1530e-02
LR: 1.0000e-07
valLoss: 1.1527e-02
Better valLoss: 1.1527e-02, Saving models...


  7%|▋         | 67/1000 [01:07<15:30,  1.00it/s]

--- Epoch 68/1000 ---
trainLoss: 1.1526e-02
LR: 1.0000e-07
valLoss: 1.1523e-02
Better valLoss: 1.1523e-02, Saving models...


  7%|▋         | 68/1000 [01:08<15:27,  1.01it/s]

--- Epoch 69/1000 ---
trainLoss: 1.1522e-02
LR: 1.0000e-07
valLoss: 1.1519e-02
Better valLoss: 1.1519e-02, Saving models...


  7%|▋         | 69/1000 [01:09<15:35,  1.00s/it]

--- Epoch 70/1000 ---
trainLoss: 1.1518e-02
LR: 1.0000e-07
valLoss: 1.1515e-02
Better valLoss: 1.1515e-02, Saving models...


  7%|▋         | 70/1000 [01:10<15:44,  1.02s/it]

--- Epoch 71/1000 ---
trainLoss: 1.1513e-02
LR: 1.0000e-07
valLoss: 1.1510e-02
Better valLoss: 1.1510e-02, Saving models...


  7%|▋         | 71/1000 [01:11<15:34,  1.01s/it]

--- Epoch 72/1000 ---
trainLoss: 1.1509e-02
LR: 1.0000e-07
valLoss: 1.1506e-02
Better valLoss: 1.1506e-02, Saving models...


  7%|▋         | 72/1000 [01:12<15:31,  1.00s/it]

--- Epoch 73/1000 ---
trainLoss: 1.1505e-02
LR: 1.0000e-07
valLoss: 1.1502e-02
Better valLoss: 1.1502e-02, Saving models...


  7%|▋         | 73/1000 [01:13<15:16,  1.01it/s]

--- Epoch 74/1000 ---
trainLoss: 1.1501e-02
LR: 1.0000e-07
valLoss: 1.1498e-02
Better valLoss: 1.1498e-02, Saving models...


  7%|▋         | 74/1000 [01:14<15:17,  1.01it/s]

--- Epoch 75/1000 ---
trainLoss: 1.1497e-02
LR: 1.0000e-07
valLoss: 1.1493e-02
Better valLoss: 1.1493e-02, Saving models...


  8%|▊         | 75/1000 [01:15<15:20,  1.01it/s]

--- Epoch 76/1000 ---
trainLoss: 1.1492e-02
LR: 1.0000e-07
valLoss: 1.1489e-02
Better valLoss: 1.1489e-02, Saving models...


  8%|▊         | 76/1000 [01:16<15:11,  1.01it/s]

--- Epoch 77/1000 ---
trainLoss: 1.1488e-02
LR: 1.0000e-07
valLoss: 1.1485e-02
Better valLoss: 1.1485e-02, Saving models...


  8%|▊         | 77/1000 [01:17<15:14,  1.01it/s]

--- Epoch 78/1000 ---
trainLoss: 1.1484e-02
LR: 1.0000e-07
valLoss: 1.1481e-02
Better valLoss: 1.1481e-02, Saving models...


  8%|▊         | 78/1000 [01:18<14:53,  1.03it/s]

--- Epoch 79/1000 ---
trainLoss: 1.1480e-02
LR: 1.0000e-07
valLoss: 1.1477e-02
Better valLoss: 1.1477e-02, Saving models...


  8%|▊         | 79/1000 [01:19<15:03,  1.02it/s]

--- Epoch 80/1000 ---
trainLoss: 1.1476e-02
LR: 1.0000e-07
valLoss: 1.1472e-02
Better valLoss: 1.1472e-02, Saving models...


  8%|▊         | 80/1000 [01:20<15:22,  1.00s/it]

--- Epoch 81/1000 ---
trainLoss: 1.1471e-02
LR: 1.0000e-07
valLoss: 1.1468e-02
Better valLoss: 1.1468e-02, Saving models...


  8%|▊         | 81/1000 [01:21<15:22,  1.00s/it]

--- Epoch 82/1000 ---
trainLoss: 1.1467e-02
LR: 1.0000e-07
valLoss: 1.1464e-02
Better valLoss: 1.1464e-02, Saving models...


  8%|▊         | 82/1000 [01:22<15:07,  1.01it/s]

--- Epoch 83/1000 ---
trainLoss: 1.1463e-02
LR: 1.0000e-07
valLoss: 1.1460e-02
Better valLoss: 1.1460e-02, Saving models...


  8%|▊         | 83/1000 [01:23<15:07,  1.01it/s]

--- Epoch 84/1000 ---
trainLoss: 1.1459e-02
LR: 1.0000e-07
valLoss: 1.1456e-02
Better valLoss: 1.1456e-02, Saving models...


  8%|▊         | 84/1000 [01:24<15:10,  1.01it/s]

--- Epoch 85/1000 ---
trainLoss: 1.1455e-02
LR: 1.0000e-07
valLoss: 1.1451e-02
Better valLoss: 1.1451e-02, Saving models...


  8%|▊         | 85/1000 [01:25<14:54,  1.02it/s]

--- Epoch 86/1000 ---
trainLoss: 1.1450e-02
LR: 1.0000e-07
valLoss: 1.1447e-02
Better valLoss: 1.1447e-02, Saving models...


  9%|▊         | 86/1000 [01:26<15:09,  1.00it/s]

--- Epoch 87/1000 ---
trainLoss: 1.1446e-02
LR: 1.0000e-07
valLoss: 1.1443e-02
Better valLoss: 1.1443e-02, Saving models...


  9%|▊         | 87/1000 [01:27<14:48,  1.03it/s]

--- Epoch 88/1000 ---
trainLoss: 1.1442e-02
LR: 1.0000e-07
valLoss: 1.1439e-02
Better valLoss: 1.1439e-02, Saving models...


  9%|▉         | 88/1000 [01:28<14:40,  1.04it/s]

--- Epoch 89/1000 ---
trainLoss: 1.1438e-02
LR: 1.0000e-07
valLoss: 1.1435e-02
Better valLoss: 1.1435e-02, Saving models...


  9%|▉         | 89/1000 [01:29<15:03,  1.01it/s]

--- Epoch 90/1000 ---
trainLoss: 1.1434e-02
LR: 1.0000e-07
valLoss: 1.1430e-02
Better valLoss: 1.1430e-02, Saving models...


  9%|▉         | 90/1000 [01:30<15:18,  1.01s/it]

--- Epoch 91/1000 ---
trainLoss: 1.1429e-02
LR: 1.0000e-07
valLoss: 1.1426e-02
Better valLoss: 1.1426e-02, Saving models...


  9%|▉         | 91/1000 [01:31<15:30,  1.02s/it]

--- Epoch 92/1000 ---
trainLoss: 1.1425e-02
LR: 1.0000e-07
valLoss: 1.1422e-02
Better valLoss: 1.1422e-02, Saving models...


  9%|▉         | 92/1000 [01:32<15:35,  1.03s/it]

--- Epoch 93/1000 ---
trainLoss: 1.1421e-02
LR: 1.0000e-07
valLoss: 1.1418e-02
Better valLoss: 1.1418e-02, Saving models...


  9%|▉         | 93/1000 [01:33<15:29,  1.02s/it]

--- Epoch 94/1000 ---
trainLoss: 1.1417e-02
LR: 1.0000e-07
valLoss: 1.1414e-02
Better valLoss: 1.1414e-02, Saving models...


  9%|▉         | 94/1000 [01:34<15:22,  1.02s/it]

--- Epoch 95/1000 ---
trainLoss: 1.1413e-02
LR: 1.0000e-07
valLoss: 1.1409e-02
Better valLoss: 1.1409e-02, Saving models...


 10%|▉         | 95/1000 [01:35<15:03,  1.00it/s]

--- Epoch 96/1000 ---
trainLoss: 1.1408e-02
LR: 1.0000e-07
valLoss: 1.1405e-02
Better valLoss: 1.1405e-02, Saving models...


 10%|▉         | 96/1000 [01:36<14:59,  1.00it/s]

--- Epoch 97/1000 ---
trainLoss: 1.1404e-02
LR: 1.0000e-07
valLoss: 1.1401e-02
Better valLoss: 1.1401e-02, Saving models...


 10%|▉         | 97/1000 [01:37<15:14,  1.01s/it]

--- Epoch 98/1000 ---
trainLoss: 1.1400e-02
LR: 1.0000e-07
valLoss: 1.1397e-02
Better valLoss: 1.1397e-02, Saving models...


 10%|▉         | 98/1000 [01:38<15:09,  1.01s/it]

--- Epoch 99/1000 ---
trainLoss: 1.1396e-02
LR: 1.0000e-07
valLoss: 1.1393e-02
Better valLoss: 1.1393e-02, Saving models...


 10%|▉         | 99/1000 [01:39<15:15,  1.02s/it]

--- Epoch 100/1000 ---
trainLoss: 1.1392e-02
LR: 1.0000e-07
valLoss: 1.1388e-02
Better valLoss: 1.1388e-02, Saving models...


 10%|█         | 100/1000 [01:40<15:23,  1.03s/it]

--- Epoch 101/1000 ---
trainLoss: 1.1387e-02
LR: 1.0000e-07
valLoss: 1.1384e-02
Better valLoss: 1.1384e-02, Saving models...


 10%|█         | 101/1000 [01:41<15:15,  1.02s/it]

--- Epoch 102/1000 ---
trainLoss: 1.1383e-02
LR: 1.0000e-07
valLoss: 1.1380e-02
Better valLoss: 1.1380e-02, Saving models...


 10%|█         | 102/1000 [01:42<14:56,  1.00it/s]

--- Epoch 103/1000 ---
trainLoss: 1.1379e-02
LR: 1.0000e-07
valLoss: 1.1376e-02
Better valLoss: 1.1376e-02, Saving models...


 10%|█         | 103/1000 [01:43<14:58,  1.00s/it]

--- Epoch 104/1000 ---
trainLoss: 1.1375e-02
LR: 1.0000e-07
valLoss: 1.1372e-02
Better valLoss: 1.1372e-02, Saving models...


 10%|█         | 104/1000 [01:44<15:05,  1.01s/it]

--- Epoch 105/1000 ---
trainLoss: 1.1371e-02
LR: 1.0000e-07
valLoss: 1.1367e-02
Better valLoss: 1.1367e-02, Saving models...


 10%|█         | 105/1000 [01:45<14:57,  1.00s/it]

--- Epoch 106/1000 ---
trainLoss: 1.1366e-02
LR: 1.0000e-07
valLoss: 1.1363e-02
Better valLoss: 1.1363e-02, Saving models...


 11%|█         | 106/1000 [01:46<14:48,  1.01it/s]

--- Epoch 107/1000 ---
trainLoss: 1.1362e-02
LR: 1.0000e-07
valLoss: 1.1359e-02
Better valLoss: 1.1359e-02, Saving models...


 11%|█         | 107/1000 [01:47<15:08,  1.02s/it]

--- Epoch 108/1000 ---
trainLoss: 1.1358e-02
LR: 1.0000e-07
valLoss: 1.1355e-02
Better valLoss: 1.1355e-02, Saving models...


 11%|█         | 108/1000 [01:48<15:18,  1.03s/it]

--- Epoch 109/1000 ---
trainLoss: 1.1354e-02
LR: 1.0000e-07
valLoss: 1.1351e-02
Better valLoss: 1.1351e-02, Saving models...


 11%|█         | 109/1000 [01:49<15:10,  1.02s/it]

--- Epoch 110/1000 ---
trainLoss: 1.1350e-02
LR: 1.0000e-07
valLoss: 1.1347e-02
Better valLoss: 1.1347e-02, Saving models...


 11%|█         | 110/1000 [01:50<15:05,  1.02s/it]

--- Epoch 111/1000 ---
trainLoss: 1.1346e-02
LR: 1.0000e-07
valLoss: 1.1342e-02
Better valLoss: 1.1342e-02, Saving models...


 11%|█         | 111/1000 [01:51<14:57,  1.01s/it]

--- Epoch 112/1000 ---
trainLoss: 1.1341e-02
LR: 1.0000e-07
valLoss: 1.1338e-02
Better valLoss: 1.1338e-02, Saving models...


 11%|█         | 112/1000 [01:52<14:53,  1.01s/it]

--- Epoch 113/1000 ---
trainLoss: 1.1337e-02
LR: 1.0000e-07
valLoss: 1.1334e-02
Better valLoss: 1.1334e-02, Saving models...


 11%|█▏        | 113/1000 [01:53<14:43,  1.00it/s]

--- Epoch 114/1000 ---
trainLoss: 1.1333e-02
LR: 1.0000e-07
valLoss: 1.1330e-02
Better valLoss: 1.1330e-02, Saving models...


 11%|█▏        | 114/1000 [01:54<14:21,  1.03it/s]

--- Epoch 115/1000 ---
trainLoss: 1.1329e-02
LR: 1.0000e-07
valLoss: 1.1326e-02
Better valLoss: 1.1326e-02, Saving models...


 12%|█▏        | 115/1000 [01:55<14:36,  1.01it/s]

--- Epoch 116/1000 ---
trainLoss: 1.1325e-02
LR: 1.0000e-07
valLoss: 1.1322e-02
Better valLoss: 1.1322e-02, Saving models...


 12%|█▏        | 116/1000 [01:56<14:38,  1.01it/s]

--- Epoch 117/1000 ---
trainLoss: 1.1320e-02
LR: 1.0000e-07
valLoss: 1.1317e-02
Better valLoss: 1.1317e-02, Saving models...


 12%|█▏        | 117/1000 [01:57<14:40,  1.00it/s]

--- Epoch 118/1000 ---
trainLoss: 1.1316e-02
LR: 1.0000e-07
valLoss: 1.1313e-02
Better valLoss: 1.1313e-02, Saving models...


 12%|█▏        | 118/1000 [01:58<14:45,  1.00s/it]

--- Epoch 119/1000 ---
trainLoss: 1.1312e-02
LR: 1.0000e-07
valLoss: 1.1309e-02
Better valLoss: 1.1309e-02, Saving models...


 12%|█▏        | 119/1000 [01:59<14:36,  1.01it/s]

--- Epoch 120/1000 ---
trainLoss: 1.1308e-02
LR: 1.0000e-07
valLoss: 1.1305e-02
Better valLoss: 1.1305e-02, Saving models...


 12%|█▏        | 120/1000 [02:00<14:38,  1.00it/s]

--- Epoch 121/1000 ---
trainLoss: 1.1304e-02
LR: 1.0000e-07
valLoss: 1.1301e-02
Better valLoss: 1.1301e-02, Saving models...


 12%|█▏        | 121/1000 [02:01<14:31,  1.01it/s]

--- Epoch 122/1000 ---
trainLoss: 1.1300e-02
LR: 1.0000e-07
valLoss: 1.1296e-02
Better valLoss: 1.1296e-02, Saving models...


 12%|█▏        | 122/1000 [02:02<14:40,  1.00s/it]

--- Epoch 123/1000 ---
trainLoss: 1.1295e-02
LR: 1.0000e-07
valLoss: 1.1292e-02
Better valLoss: 1.1292e-02, Saving models...


 12%|█▏        | 123/1000 [02:03<14:22,  1.02it/s]

--- Epoch 124/1000 ---
trainLoss: 1.1291e-02
LR: 1.0000e-07
valLoss: 1.1288e-02
Better valLoss: 1.1288e-02, Saving models...


 12%|█▏        | 124/1000 [02:04<14:22,  1.02it/s]

--- Epoch 125/1000 ---
trainLoss: 1.1287e-02
LR: 1.0000e-07
valLoss: 1.1284e-02
Better valLoss: 1.1284e-02, Saving models...


 12%|█▎        | 125/1000 [02:05<14:24,  1.01it/s]

--- Epoch 126/1000 ---
trainLoss: 1.1283e-02
LR: 1.0000e-07
valLoss: 1.1280e-02
Better valLoss: 1.1280e-02, Saving models...


 13%|█▎        | 126/1000 [02:06<14:34,  1.00s/it]

--- Epoch 127/1000 ---
trainLoss: 1.1279e-02
LR: 1.0000e-07
valLoss: 1.1275e-02
Better valLoss: 1.1275e-02, Saving models...


 13%|█▎        | 127/1000 [02:07<14:32,  1.00it/s]

--- Epoch 128/1000 ---
trainLoss: 1.1274e-02
LR: 1.0000e-07
valLoss: 1.1271e-02
Better valLoss: 1.1271e-02, Saving models...


 13%|█▎        | 128/1000 [02:08<14:30,  1.00it/s]

--- Epoch 129/1000 ---
trainLoss: 1.1270e-02
LR: 1.0000e-07
valLoss: 1.1267e-02
Better valLoss: 1.1267e-02, Saving models...


 13%|█▎        | 129/1000 [02:09<14:26,  1.00it/s]

--- Epoch 130/1000 ---
trainLoss: 1.1266e-02
LR: 1.0000e-07
valLoss: 1.1263e-02
Better valLoss: 1.1263e-02, Saving models...


 13%|█▎        | 130/1000 [02:10<14:17,  1.01it/s]

--- Epoch 131/1000 ---
trainLoss: 1.1262e-02
LR: 1.0000e-07
valLoss: 1.1259e-02
Better valLoss: 1.1259e-02, Saving models...


 13%|█▎        | 131/1000 [02:11<14:21,  1.01it/s]

--- Epoch 132/1000 ---
trainLoss: 1.1258e-02
LR: 1.0000e-07
valLoss: 1.1254e-02
Better valLoss: 1.1254e-02, Saving models...


 13%|█▎        | 132/1000 [02:12<14:21,  1.01it/s]

--- Epoch 133/1000 ---
trainLoss: 1.1253e-02
LR: 1.0000e-07
valLoss: 1.1250e-02
Better valLoss: 1.1250e-02, Saving models...


 13%|█▎        | 133/1000 [02:13<14:27,  1.00s/it]

--- Epoch 134/1000 ---
trainLoss: 1.1249e-02
LR: 1.0000e-07
valLoss: 1.1246e-02
Better valLoss: 1.1246e-02, Saving models...


 13%|█▎        | 134/1000 [02:14<14:14,  1.01it/s]

--- Epoch 135/1000 ---
trainLoss: 1.1245e-02
LR: 1.0000e-07
valLoss: 1.1242e-02
Better valLoss: 1.1242e-02, Saving models...


 14%|█▎        | 135/1000 [02:15<14:34,  1.01s/it]

--- Epoch 136/1000 ---
trainLoss: 1.1241e-02
LR: 1.0000e-07
valLoss: 1.1238e-02
Better valLoss: 1.1238e-02, Saving models...


 14%|█▎        | 136/1000 [02:16<14:28,  1.00s/it]

--- Epoch 137/1000 ---
trainLoss: 1.1237e-02
LR: 1.0000e-07
valLoss: 1.1234e-02
Better valLoss: 1.1234e-02, Saving models...


 14%|█▎        | 137/1000 [02:17<14:25,  1.00s/it]

--- Epoch 138/1000 ---
trainLoss: 1.1232e-02
LR: 1.0000e-07
valLoss: 1.1229e-02
Better valLoss: 1.1229e-02, Saving models...


 14%|█▍        | 138/1000 [02:18<14:41,  1.02s/it]

--- Epoch 139/1000 ---
trainLoss: 1.1228e-02
LR: 1.0000e-07
valLoss: 1.1225e-02
Better valLoss: 1.1225e-02, Saving models...


 14%|█▍        | 139/1000 [02:19<14:29,  1.01s/it]

--- Epoch 140/1000 ---
trainLoss: 1.1224e-02
LR: 1.0000e-07
valLoss: 1.1221e-02
Better valLoss: 1.1221e-02, Saving models...


 14%|█▍        | 140/1000 [02:20<14:31,  1.01s/it]

--- Epoch 141/1000 ---
trainLoss: 1.1220e-02
LR: 1.0000e-07
valLoss: 1.1217e-02
Better valLoss: 1.1217e-02, Saving models...


 14%|█▍        | 141/1000 [02:21<14:27,  1.01s/it]

--- Epoch 142/1000 ---
trainLoss: 1.1216e-02
LR: 1.0000e-07
valLoss: 1.1213e-02
Better valLoss: 1.1213e-02, Saving models...


 14%|█▍        | 142/1000 [02:22<14:25,  1.01s/it]

--- Epoch 143/1000 ---
trainLoss: 1.1211e-02
LR: 1.0000e-07
valLoss: 1.1208e-02
Better valLoss: 1.1208e-02, Saving models...


 14%|█▍        | 143/1000 [02:23<13:59,  1.02it/s]

--- Epoch 144/1000 ---
trainLoss: 1.1207e-02
LR: 1.0000e-07
valLoss: 1.1204e-02
Better valLoss: 1.1204e-02, Saving models...


 14%|█▍        | 144/1000 [02:24<13:56,  1.02it/s]

--- Epoch 145/1000 ---
trainLoss: 1.1203e-02
LR: 1.0000e-07
valLoss: 1.1200e-02
Better valLoss: 1.1200e-02, Saving models...


 14%|█▍        | 145/1000 [02:25<13:58,  1.02it/s]

--- Epoch 146/1000 ---
trainLoss: 1.1199e-02
LR: 1.0000e-07
valLoss: 1.1196e-02
Better valLoss: 1.1196e-02, Saving models...


 15%|█▍        | 146/1000 [02:26<13:55,  1.02it/s]

--- Epoch 147/1000 ---
trainLoss: 1.1195e-02
LR: 1.0000e-07
valLoss: 1.1192e-02
Better valLoss: 1.1192e-02, Saving models...


 15%|█▍        | 147/1000 [02:27<14:08,  1.01it/s]

--- Epoch 148/1000 ---
trainLoss: 1.1191e-02
LR: 1.0000e-07
valLoss: 1.1187e-02
Better valLoss: 1.1187e-02, Saving models...


 15%|█▍        | 148/1000 [02:28<14:21,  1.01s/it]

--- Epoch 149/1000 ---
trainLoss: 1.1186e-02
LR: 1.0000e-07
valLoss: 1.1183e-02
Better valLoss: 1.1183e-02, Saving models...


 15%|█▍        | 149/1000 [02:29<14:16,  1.01s/it]

--- Epoch 150/1000 ---
trainLoss: 1.1182e-02
LR: 1.0000e-07
valLoss: 1.1179e-02
Better valLoss: 1.1179e-02, Saving models...


 15%|█▌        | 150/1000 [02:30<14:44,  1.04s/it]

--- Epoch 151/1000 ---
trainLoss: 1.1178e-02
LR: 1.0000e-07
valLoss: 1.1175e-02
Better valLoss: 1.1175e-02, Saving models...


 15%|█▌        | 151/1000 [02:31<14:36,  1.03s/it]

--- Epoch 152/1000 ---
trainLoss: 1.1174e-02
LR: 1.0000e-07
valLoss: 1.1171e-02
Better valLoss: 1.1171e-02, Saving models...


 15%|█▌        | 152/1000 [02:32<14:24,  1.02s/it]

--- Epoch 153/1000 ---
trainLoss: 1.1170e-02
LR: 1.0000e-07
valLoss: 1.1167e-02
Better valLoss: 1.1167e-02, Saving models...


 15%|█▌        | 153/1000 [02:33<14:16,  1.01s/it]

--- Epoch 154/1000 ---
trainLoss: 1.1166e-02
LR: 1.0000e-07
valLoss: 1.1162e-02
Better valLoss: 1.1162e-02, Saving models...


 15%|█▌        | 154/1000 [02:34<14:09,  1.00s/it]

--- Epoch 155/1000 ---
trainLoss: 1.1161e-02
LR: 1.0000e-07
valLoss: 1.1158e-02
Better valLoss: 1.1158e-02, Saving models...


 16%|█▌        | 155/1000 [02:35<13:53,  1.01it/s]

--- Epoch 156/1000 ---
trainLoss: 1.1157e-02
LR: 1.0000e-07
valLoss: 1.1154e-02
Better valLoss: 1.1154e-02, Saving models...


 16%|█▌        | 156/1000 [02:36<13:57,  1.01it/s]

--- Epoch 157/1000 ---
trainLoss: 1.1153e-02
LR: 1.0000e-07
valLoss: 1.1150e-02
Better valLoss: 1.1150e-02, Saving models...


 16%|█▌        | 157/1000 [02:37<14:16,  1.02s/it]

--- Epoch 158/1000 ---
trainLoss: 1.1149e-02
LR: 1.0000e-07
valLoss: 1.1146e-02
Better valLoss: 1.1146e-02, Saving models...


 16%|█▌        | 158/1000 [02:38<14:31,  1.04s/it]

--- Epoch 159/1000 ---
trainLoss: 1.1145e-02
LR: 1.0000e-07
valLoss: 1.1142e-02
Better valLoss: 1.1142e-02, Saving models...


 16%|█▌        | 159/1000 [02:39<14:15,  1.02s/it]

--- Epoch 160/1000 ---
trainLoss: 1.1140e-02
LR: 1.0000e-07
valLoss: 1.1137e-02
Better valLoss: 1.1137e-02, Saving models...


 16%|█▌        | 160/1000 [02:40<14:12,  1.01s/it]

--- Epoch 161/1000 ---
trainLoss: 1.1136e-02
LR: 1.0000e-07
valLoss: 1.1133e-02
Better valLoss: 1.1133e-02, Saving models...


 16%|█▌        | 161/1000 [02:41<13:59,  1.00s/it]

--- Epoch 162/1000 ---
trainLoss: 1.1132e-02
LR: 1.0000e-07
valLoss: 1.1129e-02
Better valLoss: 1.1129e-02, Saving models...


 16%|█▌        | 162/1000 [02:42<14:07,  1.01s/it]

--- Epoch 163/1000 ---
trainLoss: 1.1128e-02
LR: 1.0000e-07
valLoss: 1.1125e-02
Better valLoss: 1.1125e-02, Saving models...


 16%|█▋        | 163/1000 [02:43<14:06,  1.01s/it]

--- Epoch 164/1000 ---
trainLoss: 1.1124e-02
LR: 1.0000e-07
valLoss: 1.1121e-02
Better valLoss: 1.1121e-02, Saving models...


 16%|█▋        | 164/1000 [02:44<14:14,  1.02s/it]

--- Epoch 165/1000 ---
trainLoss: 1.1120e-02
LR: 1.0000e-07
valLoss: 1.1116e-02
Better valLoss: 1.1116e-02, Saving models...


 16%|█▋        | 165/1000 [02:45<14:13,  1.02s/it]

--- Epoch 166/1000 ---
trainLoss: 1.1115e-02
LR: 1.0000e-07
valLoss: 1.1112e-02
Better valLoss: 1.1112e-02, Saving models...


 17%|█▋        | 166/1000 [02:46<13:48,  1.01it/s]

--- Epoch 167/1000 ---
trainLoss: 1.1111e-02
LR: 1.0000e-07
valLoss: 1.1108e-02
Better valLoss: 1.1108e-02, Saving models...


 17%|█▋        | 167/1000 [02:47<13:55,  1.00s/it]

--- Epoch 168/1000 ---
trainLoss: 1.1107e-02
LR: 1.0000e-07
valLoss: 1.1104e-02
Better valLoss: 1.1104e-02, Saving models...


 17%|█▋        | 168/1000 [02:48<14:02,  1.01s/it]

--- Epoch 169/1000 ---
trainLoss: 1.1103e-02
LR: 1.0000e-07
valLoss: 1.1100e-02
Better valLoss: 1.1100e-02, Saving models...


 17%|█▋        | 169/1000 [02:49<13:56,  1.01s/it]

--- Epoch 170/1000 ---
trainLoss: 1.1099e-02
LR: 1.0000e-07
valLoss: 1.1096e-02
Better valLoss: 1.1096e-02, Saving models...


 17%|█▋        | 170/1000 [02:50<13:46,  1.00it/s]

--- Epoch 171/1000 ---
trainLoss: 1.1095e-02
LR: 1.0000e-07
valLoss: 1.1091e-02
Better valLoss: 1.1091e-02, Saving models...


 17%|█▋        | 171/1000 [02:51<14:05,  1.02s/it]

--- Epoch 172/1000 ---
trainLoss: 1.1090e-02
LR: 1.0000e-07
valLoss: 1.1087e-02
Better valLoss: 1.1087e-02, Saving models...


 17%|█▋        | 172/1000 [02:52<13:53,  1.01s/it]

--- Epoch 173/1000 ---
trainLoss: 1.1086e-02
LR: 1.0000e-07
valLoss: 1.1083e-02
Better valLoss: 1.1083e-02, Saving models...


 17%|█▋        | 173/1000 [02:53<13:29,  1.02it/s]

--- Epoch 174/1000 ---
trainLoss: 1.1082e-02
LR: 1.0000e-07
valLoss: 1.1079e-02
Better valLoss: 1.1079e-02, Saving models...


 17%|█▋        | 174/1000 [02:54<13:25,  1.03it/s]

--- Epoch 175/1000 ---
trainLoss: 1.1078e-02
LR: 1.0000e-07
valLoss: 1.1075e-02
Better valLoss: 1.1075e-02, Saving models...


 18%|█▊        | 175/1000 [02:55<13:41,  1.00it/s]

--- Epoch 176/1000 ---
trainLoss: 1.1074e-02
LR: 1.0000e-07
valLoss: 1.1071e-02
Better valLoss: 1.1071e-02, Saving models...


 18%|█▊        | 176/1000 [02:56<13:32,  1.01it/s]

--- Epoch 177/1000 ---
trainLoss: 1.1070e-02
LR: 1.0000e-07
valLoss: 1.1066e-02
Better valLoss: 1.1066e-02, Saving models...


 18%|█▊        | 177/1000 [02:57<14:07,  1.03s/it]

--- Epoch 178/1000 ---
trainLoss: 1.1065e-02
LR: 1.0000e-07
valLoss: 1.1062e-02
Better valLoss: 1.1062e-02, Saving models...


 18%|█▊        | 178/1000 [02:58<14:03,  1.03s/it]

--- Epoch 179/1000 ---
trainLoss: 1.1061e-02
LR: 1.0000e-07
valLoss: 1.1058e-02
Better valLoss: 1.1058e-02, Saving models...


 18%|█▊        | 179/1000 [02:59<14:19,  1.05s/it]

--- Epoch 180/1000 ---
trainLoss: 1.1057e-02
LR: 1.0000e-07
valLoss: 1.1054e-02
Better valLoss: 1.1054e-02, Saving models...


 18%|█▊        | 180/1000 [03:00<14:15,  1.04s/it]

--- Epoch 181/1000 ---
trainLoss: 1.1053e-02
LR: 1.0000e-07
valLoss: 1.1050e-02
Better valLoss: 1.1050e-02, Saving models...


 18%|█▊        | 181/1000 [03:01<14:09,  1.04s/it]

--- Epoch 182/1000 ---
trainLoss: 1.1049e-02
LR: 1.0000e-07
valLoss: 1.1046e-02
Better valLoss: 1.1046e-02, Saving models...


 18%|█▊        | 182/1000 [03:02<14:00,  1.03s/it]

--- Epoch 183/1000 ---
trainLoss: 1.1045e-02
LR: 1.0000e-07
valLoss: 1.1042e-02
Better valLoss: 1.1042e-02, Saving models...


 18%|█▊        | 183/1000 [03:03<13:58,  1.03s/it]

--- Epoch 184/1000 ---
trainLoss: 1.1041e-02
LR: 1.0000e-07
valLoss: 1.1037e-02
Better valLoss: 1.1037e-02, Saving models...


 18%|█▊        | 184/1000 [03:04<13:51,  1.02s/it]

--- Epoch 185/1000 ---
trainLoss: 1.1036e-02
LR: 1.0000e-07
valLoss: 1.1033e-02
Better valLoss: 1.1033e-02, Saving models...


 18%|█▊        | 185/1000 [03:05<13:55,  1.03s/it]

--- Epoch 186/1000 ---
trainLoss: 1.1032e-02
LR: 1.0000e-07
valLoss: 1.1029e-02
Better valLoss: 1.1029e-02, Saving models...


 19%|█▊        | 186/1000 [03:06<13:40,  1.01s/it]

--- Epoch 187/1000 ---
trainLoss: 1.1028e-02
LR: 1.0000e-07
valLoss: 1.1025e-02
Better valLoss: 1.1025e-02, Saving models...


 19%|█▊        | 187/1000 [03:07<13:45,  1.01s/it]

--- Epoch 188/1000 ---
trainLoss: 1.1024e-02
LR: 1.0000e-07
valLoss: 1.1021e-02
Better valLoss: 1.1021e-02, Saving models...


 19%|█▉        | 188/1000 [03:08<13:37,  1.01s/it]

--- Epoch 189/1000 ---
trainLoss: 1.1020e-02
LR: 1.0000e-07
valLoss: 1.1017e-02
Better valLoss: 1.1017e-02, Saving models...


 19%|█▉        | 189/1000 [03:09<13:43,  1.01s/it]

--- Epoch 190/1000 ---
trainLoss: 1.1016e-02
LR: 1.0000e-07
valLoss: 1.1012e-02
Better valLoss: 1.1012e-02, Saving models...


 19%|█▉        | 190/1000 [03:10<13:36,  1.01s/it]

--- Epoch 191/1000 ---
trainLoss: 1.1011e-02
LR: 1.0000e-07
valLoss: 1.1008e-02
Better valLoss: 1.1008e-02, Saving models...


 19%|█▉        | 191/1000 [03:11<13:39,  1.01s/it]

--- Epoch 192/1000 ---
trainLoss: 1.1007e-02
LR: 1.0000e-07
valLoss: 1.1004e-02
Better valLoss: 1.1004e-02, Saving models...


 19%|█▉        | 192/1000 [03:12<13:27,  1.00it/s]

--- Epoch 193/1000 ---
trainLoss: 1.1003e-02
LR: 1.0000e-07
valLoss: 1.1000e-02
Better valLoss: 1.1000e-02, Saving models...


 19%|█▉        | 193/1000 [03:13<13:14,  1.02it/s]

--- Epoch 194/1000 ---
trainLoss: 1.0999e-02
LR: 1.0000e-07
valLoss: 1.0996e-02
Better valLoss: 1.0996e-02, Saving models...


 19%|█▉        | 194/1000 [03:14<13:26,  1.00s/it]

--- Epoch 195/1000 ---
trainLoss: 1.0995e-02
LR: 1.0000e-07
valLoss: 1.0992e-02
Better valLoss: 1.0992e-02, Saving models...


 20%|█▉        | 195/1000 [03:15<13:31,  1.01s/it]

--- Epoch 196/1000 ---
trainLoss: 1.0991e-02
LR: 1.0000e-07
valLoss: 1.0987e-02
Better valLoss: 1.0987e-02, Saving models...


 20%|█▉        | 196/1000 [03:16<13:33,  1.01s/it]

--- Epoch 197/1000 ---
trainLoss: 1.0986e-02
LR: 1.0000e-07
valLoss: 1.0983e-02
Better valLoss: 1.0983e-02, Saving models...


 20%|█▉        | 197/1000 [03:17<13:39,  1.02s/it]

--- Epoch 198/1000 ---
trainLoss: 1.0982e-02
LR: 1.0000e-07
valLoss: 1.0979e-02
Better valLoss: 1.0979e-02, Saving models...


 20%|█▉        | 198/1000 [03:18<13:40,  1.02s/it]

--- Epoch 199/1000 ---
trainLoss: 1.0978e-02
LR: 1.0000e-07
valLoss: 1.0975e-02
Better valLoss: 1.0975e-02, Saving models...


 20%|█▉        | 199/1000 [03:19<13:41,  1.03s/it]

--- Epoch 200/1000 ---
trainLoss: 1.0974e-02
LR: 1.0000e-07
valLoss: 1.0971e-02
Better valLoss: 1.0971e-02, Saving models...


 20%|██        | 200/1000 [03:20<13:35,  1.02s/it]

--- Epoch 201/1000 ---
trainLoss: 1.0970e-02
LR: 1.0000e-07
valLoss: 1.0967e-02
Better valLoss: 1.0967e-02, Saving models...


 20%|██        | 201/1000 [03:22<13:39,  1.03s/it]

--- Epoch 202/1000 ---
trainLoss: 1.0966e-02
LR: 1.0000e-07
valLoss: 1.0963e-02
Better valLoss: 1.0963e-02, Saving models...


 20%|██        | 202/1000 [03:23<13:37,  1.02s/it]

--- Epoch 203/1000 ---
trainLoss: 1.0961e-02
LR: 1.0000e-07
valLoss: 1.0958e-02
Better valLoss: 1.0958e-02, Saving models...


 20%|██        | 203/1000 [03:24<13:31,  1.02s/it]

--- Epoch 204/1000 ---
trainLoss: 1.0957e-02
LR: 1.0000e-07
valLoss: 1.0954e-02
Better valLoss: 1.0954e-02, Saving models...


 20%|██        | 204/1000 [03:25<13:41,  1.03s/it]

--- Epoch 205/1000 ---
trainLoss: 1.0953e-02
LR: 1.0000e-07
valLoss: 1.0950e-02
Better valLoss: 1.0950e-02, Saving models...


 20%|██        | 205/1000 [03:26<13:39,  1.03s/it]

--- Epoch 206/1000 ---
trainLoss: 1.0949e-02
LR: 1.0000e-07
valLoss: 1.0946e-02
Better valLoss: 1.0946e-02, Saving models...


 21%|██        | 206/1000 [03:27<13:29,  1.02s/it]

--- Epoch 207/1000 ---
trainLoss: 1.0945e-02
LR: 1.0000e-07
valLoss: 1.0942e-02
Better valLoss: 1.0942e-02, Saving models...


 21%|██        | 207/1000 [03:28<13:35,  1.03s/it]

--- Epoch 208/1000 ---
trainLoss: 1.0941e-02
LR: 1.0000e-07
valLoss: 1.0938e-02
Better valLoss: 1.0938e-02, Saving models...


 21%|██        | 208/1000 [03:29<13:28,  1.02s/it]

--- Epoch 209/1000 ---
trainLoss: 1.0937e-02
LR: 1.0000e-07
valLoss: 1.0933e-02
Better valLoss: 1.0933e-02, Saving models...


 21%|██        | 209/1000 [03:30<13:29,  1.02s/it]

--- Epoch 210/1000 ---
trainLoss: 1.0932e-02
LR: 1.0000e-07
valLoss: 1.0929e-02
Better valLoss: 1.0929e-02, Saving models...


 21%|██        | 210/1000 [03:31<13:25,  1.02s/it]

--- Epoch 211/1000 ---
trainLoss: 1.0928e-02
LR: 1.0000e-07
valLoss: 1.0925e-02
Better valLoss: 1.0925e-02, Saving models...


 21%|██        | 211/1000 [03:32<13:25,  1.02s/it]

--- Epoch 212/1000 ---
trainLoss: 1.0924e-02
LR: 1.0000e-07
valLoss: 1.0921e-02
Better valLoss: 1.0921e-02, Saving models...


 21%|██        | 212/1000 [03:33<13:16,  1.01s/it]

--- Epoch 213/1000 ---
trainLoss: 1.0920e-02
LR: 1.0000e-07
valLoss: 1.0917e-02
Better valLoss: 1.0917e-02, Saving models...


 21%|██▏       | 213/1000 [03:34<13:07,  1.00s/it]

--- Epoch 214/1000 ---
trainLoss: 1.0916e-02
LR: 1.0000e-07
valLoss: 1.0913e-02
Better valLoss: 1.0913e-02, Saving models...


 21%|██▏       | 214/1000 [03:35<13:10,  1.01s/it]

--- Epoch 215/1000 ---
trainLoss: 1.0912e-02
LR: 1.0000e-07
valLoss: 1.0909e-02
Better valLoss: 1.0909e-02, Saving models...


 22%|██▏       | 215/1000 [03:36<13:14,  1.01s/it]

--- Epoch 216/1000 ---
trainLoss: 1.0908e-02
LR: 1.0000e-07
valLoss: 1.0904e-02
Better valLoss: 1.0904e-02, Saving models...


 22%|██▏       | 216/1000 [03:37<13:04,  1.00s/it]

--- Epoch 217/1000 ---
trainLoss: 1.0903e-02
LR: 1.0000e-07
valLoss: 1.0900e-02
Better valLoss: 1.0900e-02, Saving models...


 22%|██▏       | 217/1000 [03:38<13:07,  1.01s/it]

--- Epoch 218/1000 ---
trainLoss: 1.0899e-02
LR: 1.0000e-07
valLoss: 1.0896e-02
Better valLoss: 1.0896e-02, Saving models...


 22%|██▏       | 218/1000 [03:39<12:57,  1.01it/s]

--- Epoch 219/1000 ---
trainLoss: 1.0895e-02
LR: 1.0000e-07
valLoss: 1.0892e-02
Better valLoss: 1.0892e-02, Saving models...


 22%|██▏       | 219/1000 [03:40<13:11,  1.01s/it]

--- Epoch 220/1000 ---
trainLoss: 1.0891e-02
LR: 1.0000e-07
valLoss: 1.0888e-02
Better valLoss: 1.0888e-02, Saving models...


 22%|██▏       | 220/1000 [03:41<12:59,  1.00it/s]

--- Epoch 221/1000 ---
trainLoss: 1.0887e-02
LR: 1.0000e-07
valLoss: 1.0884e-02
Better valLoss: 1.0884e-02, Saving models...


 22%|██▏       | 221/1000 [03:42<12:53,  1.01it/s]

--- Epoch 222/1000 ---
trainLoss: 1.0883e-02
LR: 1.0000e-07
valLoss: 1.0880e-02
Better valLoss: 1.0880e-02, Saving models...


 22%|██▏       | 222/1000 [03:43<12:45,  1.02it/s]

--- Epoch 223/1000 ---
trainLoss: 1.0879e-02
LR: 1.0000e-07
valLoss: 1.0875e-02
Better valLoss: 1.0875e-02, Saving models...


 22%|██▏       | 223/1000 [03:44<12:27,  1.04it/s]

--- Epoch 224/1000 ---
trainLoss: 1.0874e-02
LR: 1.0000e-07
valLoss: 1.0871e-02
Better valLoss: 1.0871e-02, Saving models...


 22%|██▏       | 224/1000 [03:45<12:32,  1.03it/s]

--- Epoch 225/1000 ---
trainLoss: 1.0870e-02
LR: 1.0000e-07
valLoss: 1.0867e-02
Better valLoss: 1.0867e-02, Saving models...


 22%|██▎       | 225/1000 [03:46<12:58,  1.01s/it]

--- Epoch 226/1000 ---
trainLoss: 1.0866e-02
LR: 1.0000e-07
valLoss: 1.0863e-02
Better valLoss: 1.0863e-02, Saving models...


 23%|██▎       | 226/1000 [03:47<12:46,  1.01it/s]

--- Epoch 227/1000 ---
trainLoss: 1.0862e-02
LR: 1.0000e-07
valLoss: 1.0859e-02
Better valLoss: 1.0859e-02, Saving models...


 23%|██▎       | 227/1000 [03:48<12:55,  1.00s/it]

--- Epoch 228/1000 ---
trainLoss: 1.0858e-02
LR: 1.0000e-07
valLoss: 1.0855e-02
Better valLoss: 1.0855e-02, Saving models...


 23%|██▎       | 228/1000 [03:49<13:03,  1.01s/it]

--- Epoch 229/1000 ---
trainLoss: 1.0854e-02
LR: 1.0000e-07
valLoss: 1.0851e-02
Better valLoss: 1.0851e-02, Saving models...


 23%|██▎       | 229/1000 [03:50<13:07,  1.02s/it]

--- Epoch 230/1000 ---
trainLoss: 1.0850e-02
LR: 1.0000e-07
valLoss: 1.0846e-02
Better valLoss: 1.0846e-02, Saving models...


 23%|██▎       | 230/1000 [03:51<13:01,  1.02s/it]

--- Epoch 231/1000 ---
trainLoss: 1.0845e-02
LR: 1.0000e-07
valLoss: 1.0842e-02
Better valLoss: 1.0842e-02, Saving models...


 23%|██▎       | 231/1000 [03:52<13:08,  1.03s/it]

--- Epoch 232/1000 ---
trainLoss: 1.0841e-02
LR: 1.0000e-07
valLoss: 1.0838e-02
Better valLoss: 1.0838e-02, Saving models...


 23%|██▎       | 232/1000 [03:53<13:09,  1.03s/it]

--- Epoch 233/1000 ---
trainLoss: 1.0837e-02
LR: 1.0000e-07
valLoss: 1.0834e-02
Better valLoss: 1.0834e-02, Saving models...


 23%|██▎       | 233/1000 [03:54<13:16,  1.04s/it]

--- Epoch 234/1000 ---
trainLoss: 1.0833e-02
LR: 1.0000e-07
valLoss: 1.0830e-02
Better valLoss: 1.0830e-02, Saving models...


 23%|██▎       | 234/1000 [03:55<13:10,  1.03s/it]

--- Epoch 235/1000 ---
trainLoss: 1.0829e-02
LR: 1.0000e-07
valLoss: 1.0826e-02
Better valLoss: 1.0826e-02, Saving models...


 24%|██▎       | 235/1000 [03:56<13:20,  1.05s/it]

--- Epoch 236/1000 ---
trainLoss: 1.0825e-02
LR: 1.0000e-07
valLoss: 1.0822e-02
Better valLoss: 1.0822e-02, Saving models...


 24%|██▎       | 236/1000 [03:57<12:54,  1.01s/it]

--- Epoch 237/1000 ---
trainLoss: 1.0821e-02
LR: 1.0000e-07
valLoss: 1.0817e-02
Better valLoss: 1.0817e-02, Saving models...


 24%|██▎       | 237/1000 [03:58<12:48,  1.01s/it]

--- Epoch 238/1000 ---
trainLoss: 1.0816e-02
LR: 1.0000e-07
valLoss: 1.0813e-02
Better valLoss: 1.0813e-02, Saving models...


 24%|██▍       | 238/1000 [03:59<13:00,  1.02s/it]

--- Epoch 239/1000 ---
trainLoss: 1.0812e-02
LR: 1.0000e-07
valLoss: 1.0809e-02
Better valLoss: 1.0809e-02, Saving models...


 24%|██▍       | 239/1000 [04:00<12:51,  1.01s/it]

--- Epoch 240/1000 ---
trainLoss: 1.0808e-02
LR: 1.0000e-07
valLoss: 1.0805e-02
Better valLoss: 1.0805e-02, Saving models...


 24%|██▍       | 240/1000 [04:01<12:44,  1.01s/it]

--- Epoch 241/1000 ---
trainLoss: 1.0804e-02
LR: 1.0000e-07
valLoss: 1.0801e-02
Better valLoss: 1.0801e-02, Saving models...


 24%|██▍       | 241/1000 [04:02<12:34,  1.01it/s]

--- Epoch 242/1000 ---
trainLoss: 1.0800e-02
LR: 1.0000e-07
valLoss: 1.0797e-02
Better valLoss: 1.0797e-02, Saving models...


 24%|██▍       | 242/1000 [04:03<12:37,  1.00it/s]

--- Epoch 243/1000 ---
trainLoss: 1.0796e-02
LR: 1.0000e-07
valLoss: 1.0793e-02
Better valLoss: 1.0793e-02, Saving models...


 24%|██▍       | 243/1000 [04:04<12:06,  1.04it/s]

--- Epoch 244/1000 ---
trainLoss: 1.0792e-02
LR: 1.0000e-07
valLoss: 1.0789e-02
Better valLoss: 1.0789e-02, Saving models...


 24%|██▍       | 244/1000 [04:05<12:17,  1.03it/s]

--- Epoch 245/1000 ---
trainLoss: 1.0788e-02
LR: 1.0000e-07
valLoss: 1.0785e-02
Better valLoss: 1.0785e-02, Saving models...


 24%|██▍       | 245/1000 [04:06<12:15,  1.03it/s]

--- Epoch 246/1000 ---
trainLoss: 1.0784e-02
LR: 1.0000e-07
valLoss: 1.0780e-02
Better valLoss: 1.0780e-02, Saving models...


 25%|██▍       | 246/1000 [04:07<12:10,  1.03it/s]

--- Epoch 247/1000 ---
trainLoss: 1.0779e-02
LR: 1.0000e-07
valLoss: 1.0776e-02
Better valLoss: 1.0776e-02, Saving models...


 25%|██▍       | 247/1000 [04:08<12:27,  1.01it/s]

--- Epoch 248/1000 ---
trainLoss: 1.0775e-02
LR: 1.0000e-07
valLoss: 1.0772e-02
Better valLoss: 1.0772e-02, Saving models...


 25%|██▍       | 248/1000 [04:09<12:36,  1.01s/it]

--- Epoch 249/1000 ---
trainLoss: 1.0771e-02
LR: 1.0000e-07
valLoss: 1.0768e-02
Better valLoss: 1.0768e-02, Saving models...


 25%|██▍       | 249/1000 [04:10<12:38,  1.01s/it]

--- Epoch 250/1000 ---
trainLoss: 1.0767e-02
LR: 1.0000e-07
valLoss: 1.0764e-02
Better valLoss: 1.0764e-02, Saving models...


 25%|██▌       | 250/1000 [04:11<12:33,  1.00s/it]

--- Epoch 251/1000 ---
trainLoss: 1.0763e-02
LR: 1.0000e-07
valLoss: 1.0760e-02
Better valLoss: 1.0760e-02, Saving models...


 25%|██▌       | 251/1000 [04:12<12:37,  1.01s/it]

--- Epoch 252/1000 ---
trainLoss: 1.0759e-02
LR: 1.0000e-07
valLoss: 1.0756e-02
Better valLoss: 1.0756e-02, Saving models...


 25%|██▌       | 252/1000 [04:13<12:45,  1.02s/it]

--- Epoch 253/1000 ---
trainLoss: 1.0755e-02
LR: 1.0000e-07
valLoss: 1.0751e-02
Better valLoss: 1.0751e-02, Saving models...


 25%|██▌       | 253/1000 [04:14<12:33,  1.01s/it]

--- Epoch 254/1000 ---
trainLoss: 1.0750e-02
LR: 1.0000e-07
valLoss: 1.0747e-02
Better valLoss: 1.0747e-02, Saving models...


 25%|██▌       | 254/1000 [04:15<12:35,  1.01s/it]

--- Epoch 255/1000 ---
trainLoss: 1.0746e-02
LR: 1.0000e-07
valLoss: 1.0743e-02
Better valLoss: 1.0743e-02, Saving models...


 26%|██▌       | 255/1000 [04:16<12:31,  1.01s/it]

--- Epoch 256/1000 ---
trainLoss: 1.0742e-02
LR: 1.0000e-07
valLoss: 1.0739e-02
Better valLoss: 1.0739e-02, Saving models...


 26%|██▌       | 256/1000 [04:17<12:41,  1.02s/it]

--- Epoch 257/1000 ---
trainLoss: 1.0738e-02
LR: 1.0000e-07
valLoss: 1.0735e-02
Better valLoss: 1.0735e-02, Saving models...


 26%|██▌       | 257/1000 [04:18<12:51,  1.04s/it]

--- Epoch 258/1000 ---
trainLoss: 1.0734e-02
LR: 1.0000e-07
valLoss: 1.0731e-02
Better valLoss: 1.0731e-02, Saving models...


 26%|██▌       | 258/1000 [04:19<12:52,  1.04s/it]

--- Epoch 259/1000 ---
trainLoss: 1.0730e-02
LR: 1.0000e-07
valLoss: 1.0727e-02
Better valLoss: 1.0727e-02, Saving models...


 26%|██▌       | 259/1000 [04:20<12:50,  1.04s/it]

--- Epoch 260/1000 ---
trainLoss: 1.0726e-02
LR: 1.0000e-07
valLoss: 1.0723e-02
Better valLoss: 1.0723e-02, Saving models...


 26%|██▌       | 260/1000 [04:21<12:33,  1.02s/it]

--- Epoch 261/1000 ---
trainLoss: 1.0722e-02
LR: 1.0000e-07
valLoss: 1.0719e-02
Better valLoss: 1.0719e-02, Saving models...


 26%|██▌       | 261/1000 [04:22<12:24,  1.01s/it]

--- Epoch 262/1000 ---
trainLoss: 1.0718e-02
LR: 1.0000e-07
valLoss: 1.0715e-02
Better valLoss: 1.0715e-02, Saving models...


 26%|██▌       | 262/1000 [04:23<12:29,  1.02s/it]

--- Epoch 263/1000 ---
trainLoss: 1.0714e-02
LR: 1.0000e-07
valLoss: 1.0711e-02
Better valLoss: 1.0711e-02, Saving models...


 26%|██▋       | 263/1000 [04:24<12:02,  1.02it/s]

--- Epoch 264/1000 ---
trainLoss: 1.0709e-02
LR: 1.0000e-07
valLoss: 1.0706e-02
Better valLoss: 1.0706e-02, Saving models...


 26%|██▋       | 264/1000 [04:25<11:58,  1.02it/s]

--- Epoch 265/1000 ---
trainLoss: 1.0705e-02
LR: 1.0000e-07
valLoss: 1.0702e-02
Better valLoss: 1.0702e-02, Saving models...


 26%|██▋       | 265/1000 [04:26<12:04,  1.01it/s]

--- Epoch 266/1000 ---
trainLoss: 1.0701e-02
LR: 1.0000e-07
valLoss: 1.0698e-02
Better valLoss: 1.0698e-02, Saving models...


 27%|██▋       | 266/1000 [04:27<12:04,  1.01it/s]

--- Epoch 267/1000 ---
trainLoss: 1.0697e-02
LR: 1.0000e-07
valLoss: 1.0694e-02
Better valLoss: 1.0694e-02, Saving models...


 27%|██▋       | 267/1000 [04:28<12:00,  1.02it/s]

--- Epoch 268/1000 ---
trainLoss: 1.0693e-02
LR: 1.0000e-07
valLoss: 1.0690e-02
Better valLoss: 1.0690e-02, Saving models...


 27%|██▋       | 268/1000 [04:29<11:57,  1.02it/s]

--- Epoch 269/1000 ---
trainLoss: 1.0689e-02
LR: 1.0000e-07
valLoss: 1.0686e-02
Better valLoss: 1.0686e-02, Saving models...


 27%|██▋       | 269/1000 [04:30<12:02,  1.01it/s]

--- Epoch 270/1000 ---
trainLoss: 1.0685e-02
LR: 1.0000e-07
valLoss: 1.0682e-02
Better valLoss: 1.0682e-02, Saving models...


 27%|██▋       | 270/1000 [04:31<11:54,  1.02it/s]

--- Epoch 271/1000 ---
trainLoss: 1.0681e-02
LR: 1.0000e-07
valLoss: 1.0678e-02
Better valLoss: 1.0678e-02, Saving models...


 27%|██▋       | 271/1000 [04:32<12:15,  1.01s/it]

--- Epoch 272/1000 ---
trainLoss: 1.0677e-02
LR: 1.0000e-07
valLoss: 1.0674e-02
Better valLoss: 1.0674e-02, Saving models...


 27%|██▋       | 272/1000 [04:33<12:12,  1.01s/it]

--- Epoch 273/1000 ---
trainLoss: 1.0673e-02
LR: 1.0000e-07
valLoss: 1.0670e-02
Better valLoss: 1.0670e-02, Saving models...


 27%|██▋       | 273/1000 [04:34<12:02,  1.01it/s]

--- Epoch 274/1000 ---
trainLoss: 1.0669e-02
LR: 1.0000e-07
valLoss: 1.0666e-02
Better valLoss: 1.0666e-02, Saving models...


 27%|██▋       | 274/1000 [04:35<12:16,  1.01s/it]

--- Epoch 275/1000 ---
trainLoss: 1.0665e-02
LR: 1.0000e-07
valLoss: 1.0662e-02
Better valLoss: 1.0662e-02, Saving models...


 28%|██▊       | 275/1000 [04:36<12:23,  1.03s/it]

--- Epoch 276/1000 ---
trainLoss: 1.0661e-02
LR: 1.0000e-07
valLoss: 1.0657e-02
Better valLoss: 1.0657e-02, Saving models...


 28%|██▊       | 276/1000 [04:37<12:24,  1.03s/it]

--- Epoch 277/1000 ---
trainLoss: 1.0656e-02
LR: 1.0000e-07
valLoss: 1.0653e-02
Better valLoss: 1.0653e-02, Saving models...


 28%|██▊       | 277/1000 [04:38<12:25,  1.03s/it]

--- Epoch 278/1000 ---
trainLoss: 1.0652e-02
LR: 1.0000e-07
valLoss: 1.0649e-02
Better valLoss: 1.0649e-02, Saving models...


 28%|██▊       | 278/1000 [04:39<12:22,  1.03s/it]

--- Epoch 279/1000 ---
trainLoss: 1.0648e-02
LR: 1.0000e-07
valLoss: 1.0645e-02
Better valLoss: 1.0645e-02, Saving models...


 28%|██▊       | 279/1000 [04:40<12:28,  1.04s/it]

--- Epoch 280/1000 ---
trainLoss: 1.0644e-02
LR: 1.0000e-07
valLoss: 1.0641e-02
Better valLoss: 1.0641e-02, Saving models...


 28%|██▊       | 280/1000 [04:41<12:14,  1.02s/it]

--- Epoch 281/1000 ---
trainLoss: 1.0640e-02
LR: 1.0000e-07
valLoss: 1.0637e-02
Better valLoss: 1.0637e-02, Saving models...


 28%|██▊       | 281/1000 [04:42<12:17,  1.03s/it]

--- Epoch 282/1000 ---
trainLoss: 1.0636e-02
LR: 1.0000e-07
valLoss: 1.0633e-02
Better valLoss: 1.0633e-02, Saving models...


 28%|██▊       | 282/1000 [04:43<12:08,  1.02s/it]

--- Epoch 283/1000 ---
trainLoss: 1.0632e-02
LR: 1.0000e-07
valLoss: 1.0629e-02
Better valLoss: 1.0629e-02, Saving models...


 28%|██▊       | 283/1000 [04:44<12:06,  1.01s/it]

--- Epoch 284/1000 ---
trainLoss: 1.0628e-02
LR: 1.0000e-07
valLoss: 1.0625e-02
Better valLoss: 1.0625e-02, Saving models...


 28%|██▊       | 284/1000 [04:45<12:20,  1.03s/it]

--- Epoch 285/1000 ---
trainLoss: 1.0624e-02
LR: 1.0000e-07
valLoss: 1.0621e-02
Better valLoss: 1.0621e-02, Saving models...


 28%|██▊       | 285/1000 [04:46<12:22,  1.04s/it]

--- Epoch 286/1000 ---
trainLoss: 1.0620e-02
LR: 1.0000e-07
valLoss: 1.0617e-02
Better valLoss: 1.0617e-02, Saving models...


 29%|██▊       | 286/1000 [04:47<11:56,  1.00s/it]

--- Epoch 287/1000 ---
trainLoss: 1.0616e-02
LR: 1.0000e-07
valLoss: 1.0613e-02
Better valLoss: 1.0613e-02, Saving models...


 29%|██▊       | 287/1000 [04:48<12:08,  1.02s/it]

--- Epoch 288/1000 ---
trainLoss: 1.0612e-02
LR: 1.0000e-07
valLoss: 1.0609e-02
Better valLoss: 1.0609e-02, Saving models...


 29%|██▉       | 288/1000 [04:49<11:59,  1.01s/it]

--- Epoch 289/1000 ---
trainLoss: 1.0608e-02
LR: 1.0000e-07
valLoss: 1.0605e-02
Better valLoss: 1.0605e-02, Saving models...


 29%|██▉       | 289/1000 [04:50<11:48,  1.00it/s]

--- Epoch 290/1000 ---
trainLoss: 1.0604e-02
LR: 1.0000e-07
valLoss: 1.0601e-02
Better valLoss: 1.0601e-02, Saving models...


 29%|██▉       | 290/1000 [04:51<11:59,  1.01s/it]

--- Epoch 291/1000 ---
trainLoss: 1.0600e-02
LR: 1.0000e-07
valLoss: 1.0597e-02
Better valLoss: 1.0597e-02, Saving models...


 29%|██▉       | 291/1000 [04:52<11:56,  1.01s/it]

--- Epoch 292/1000 ---
trainLoss: 1.0596e-02
LR: 1.0000e-07
valLoss: 1.0593e-02
Better valLoss: 1.0593e-02, Saving models...


 29%|██▉       | 292/1000 [04:53<11:56,  1.01s/it]

--- Epoch 293/1000 ---
trainLoss: 1.0592e-02
LR: 1.0000e-07
valLoss: 1.0588e-02
Better valLoss: 1.0588e-02, Saving models...


 29%|██▉       | 293/1000 [04:54<11:54,  1.01s/it]

--- Epoch 294/1000 ---
trainLoss: 1.0587e-02
LR: 1.0000e-07
valLoss: 1.0584e-02
Better valLoss: 1.0584e-02, Saving models...


 29%|██▉       | 294/1000 [04:55<11:57,  1.02s/it]

--- Epoch 295/1000 ---
trainLoss: 1.0583e-02
LR: 1.0000e-07
valLoss: 1.0580e-02
Better valLoss: 1.0580e-02, Saving models...


 30%|██▉       | 295/1000 [04:56<11:54,  1.01s/it]

--- Epoch 296/1000 ---
trainLoss: 1.0579e-02
LR: 1.0000e-07
valLoss: 1.0576e-02
Better valLoss: 1.0576e-02, Saving models...


 30%|██▉       | 296/1000 [04:57<11:43,  1.00it/s]

--- Epoch 297/1000 ---
trainLoss: 1.0575e-02
LR: 1.0000e-07
valLoss: 1.0572e-02
Better valLoss: 1.0572e-02, Saving models...


 30%|██▉       | 297/1000 [04:58<11:54,  1.02s/it]

--- Epoch 298/1000 ---
trainLoss: 1.0571e-02
LR: 1.0000e-07
valLoss: 1.0568e-02
Better valLoss: 1.0568e-02, Saving models...


 30%|██▉       | 298/1000 [04:59<11:58,  1.02s/it]

--- Epoch 299/1000 ---
trainLoss: 1.0567e-02
LR: 1.0000e-07
valLoss: 1.0564e-02
Better valLoss: 1.0564e-02, Saving models...


 30%|██▉       | 299/1000 [05:00<11:55,  1.02s/it]

--- Epoch 300/1000 ---
trainLoss: 1.0563e-02
LR: 1.0000e-07
valLoss: 1.0560e-02
Better valLoss: 1.0560e-02, Saving models...


 30%|███       | 300/1000 [05:01<11:36,  1.00it/s]

--- Epoch 301/1000 ---
trainLoss: 1.0559e-02
LR: 1.0000e-07
valLoss: 1.0556e-02
Better valLoss: 1.0556e-02, Saving models...


 30%|███       | 301/1000 [05:02<11:44,  1.01s/it]

--- Epoch 302/1000 ---
trainLoss: 1.0555e-02
LR: 1.0000e-07
valLoss: 1.0552e-02
Better valLoss: 1.0552e-02, Saving models...


 30%|███       | 302/1000 [05:03<11:39,  1.00s/it]

--- Epoch 303/1000 ---
trainLoss: 1.0551e-02
LR: 1.0000e-07
valLoss: 1.0548e-02
Better valLoss: 1.0548e-02, Saving models...


 30%|███       | 303/1000 [05:04<11:23,  1.02it/s]

--- Epoch 304/1000 ---
trainLoss: 1.0547e-02
LR: 1.0000e-07
valLoss: 1.0544e-02
Better valLoss: 1.0544e-02, Saving models...


 30%|███       | 304/1000 [05:05<11:26,  1.01it/s]

--- Epoch 305/1000 ---
trainLoss: 1.0543e-02
LR: 1.0000e-07
valLoss: 1.0540e-02
Better valLoss: 1.0540e-02, Saving models...


 30%|███       | 305/1000 [05:06<11:32,  1.00it/s]

--- Epoch 306/1000 ---
trainLoss: 1.0539e-02
LR: 1.0000e-07
valLoss: 1.0536e-02
Better valLoss: 1.0536e-02, Saving models...


 31%|███       | 306/1000 [05:07<11:38,  1.01s/it]

--- Epoch 307/1000 ---
trainLoss: 1.0535e-02
LR: 1.0000e-07
valLoss: 1.0532e-02
Better valLoss: 1.0532e-02, Saving models...


 31%|███       | 307/1000 [05:08<11:34,  1.00s/it]

--- Epoch 308/1000 ---
trainLoss: 1.0531e-02
LR: 1.0000e-07
valLoss: 1.0528e-02
Better valLoss: 1.0528e-02, Saving models...


 31%|███       | 308/1000 [05:09<11:33,  1.00s/it]

--- Epoch 309/1000 ---
trainLoss: 1.0527e-02
LR: 1.0000e-07
valLoss: 1.0524e-02
Better valLoss: 1.0524e-02, Saving models...


 31%|███       | 309/1000 [05:10<11:33,  1.00s/it]

--- Epoch 310/1000 ---
trainLoss: 1.0523e-02
LR: 1.0000e-07
valLoss: 1.0520e-02
Better valLoss: 1.0520e-02, Saving models...


 31%|███       | 310/1000 [05:11<11:19,  1.01it/s]

--- Epoch 311/1000 ---
trainLoss: 1.0519e-02
LR: 1.0000e-07
valLoss: 1.0516e-02
Better valLoss: 1.0516e-02, Saving models...


 31%|███       | 311/1000 [05:12<11:22,  1.01it/s]

--- Epoch 312/1000 ---
trainLoss: 1.0515e-02
LR: 1.0000e-07
valLoss: 1.0512e-02
Better valLoss: 1.0512e-02, Saving models...


 31%|███       | 312/1000 [05:13<11:21,  1.01it/s]

--- Epoch 313/1000 ---
trainLoss: 1.0511e-02
LR: 1.0000e-07
valLoss: 1.0508e-02
Better valLoss: 1.0508e-02, Saving models...


 31%|███▏      | 313/1000 [05:14<11:22,  1.01it/s]

--- Epoch 314/1000 ---
trainLoss: 1.0507e-02
LR: 1.0000e-07
valLoss: 1.0504e-02
Better valLoss: 1.0504e-02, Saving models...


 31%|███▏      | 314/1000 [05:15<11:25,  1.00it/s]

--- Epoch 315/1000 ---
trainLoss: 1.0503e-02
LR: 1.0000e-07
valLoss: 1.0500e-02
Better valLoss: 1.0500e-02, Saving models...


 32%|███▏      | 315/1000 [05:17<12:24,  1.09s/it]

--- Epoch 316/1000 ---
trainLoss: 1.0499e-02
LR: 1.0000e-07
valLoss: 1.0495e-02
Better valLoss: 1.0495e-02, Saving models...


 32%|███▏      | 316/1000 [05:18<12:03,  1.06s/it]

--- Epoch 317/1000 ---
trainLoss: 1.0494e-02
LR: 1.0000e-07
valLoss: 1.0491e-02
Better valLoss: 1.0491e-02, Saving models...


 32%|███▏      | 317/1000 [05:19<11:58,  1.05s/it]

--- Epoch 318/1000 ---
trainLoss: 1.0490e-02
LR: 1.0000e-07
valLoss: 1.0487e-02
Better valLoss: 1.0487e-02, Saving models...


 32%|███▏      | 318/1000 [05:20<11:58,  1.05s/it]

--- Epoch 319/1000 ---
trainLoss: 1.0486e-02
LR: 1.0000e-07
valLoss: 1.0483e-02
Better valLoss: 1.0483e-02, Saving models...


 32%|███▏      | 319/1000 [05:21<11:52,  1.05s/it]

--- Epoch 320/1000 ---
trainLoss: 1.0482e-02
LR: 1.0000e-07
valLoss: 1.0479e-02
Better valLoss: 1.0479e-02, Saving models...


 32%|███▏      | 320/1000 [05:22<11:47,  1.04s/it]

--- Epoch 321/1000 ---
trainLoss: 1.0478e-02
LR: 1.0000e-07


 32%|███▏      | 321/1000 [05:23<11:16,  1.00it/s]

valLoss: 1.0475e-02
Better valLoss: 1.0475e-02, Saving models...
--- Epoch 322/1000 ---
trainLoss: 1.0474e-02
LR: 1.0000e-07
valLoss: 1.0471e-02
Better valLoss: 1.0471e-02, Saving models...


 32%|███▏      | 322/1000 [05:24<11:19,  1.00s/it]

--- Epoch 323/1000 ---
trainLoss: 1.0470e-02
LR: 1.0000e-07
valLoss: 1.0467e-02
Better valLoss: 1.0467e-02, Saving models...


 32%|███▏      | 323/1000 [05:25<11:17,  1.00s/it]

--- Epoch 324/1000 ---
trainLoss: 1.0466e-02
LR: 1.0000e-07
valLoss: 1.0463e-02
Better valLoss: 1.0463e-02, Saving models...


 32%|███▏      | 324/1000 [05:26<11:07,  1.01it/s]

--- Epoch 325/1000 ---
trainLoss: 1.0462e-02
LR: 1.0000e-07
valLoss: 1.0459e-02
Better valLoss: 1.0459e-02, Saving models...


 32%|███▎      | 325/1000 [05:27<11:14,  1.00it/s]

--- Epoch 326/1000 ---
trainLoss: 1.0458e-02
LR: 1.0000e-07
valLoss: 1.0455e-02
Better valLoss: 1.0455e-02, Saving models...


 33%|███▎      | 326/1000 [05:28<11:11,  1.00it/s]

--- Epoch 327/1000 ---
trainLoss: 1.0454e-02
LR: 1.0000e-07
valLoss: 1.0451e-02
Better valLoss: 1.0451e-02, Saving models...


 33%|███▎      | 327/1000 [05:29<11:06,  1.01it/s]

--- Epoch 328/1000 ---
trainLoss: 1.0450e-02
LR: 1.0000e-07
valLoss: 1.0447e-02
Better valLoss: 1.0447e-02, Saving models...


 33%|███▎      | 328/1000 [05:30<11:09,  1.00it/s]

--- Epoch 329/1000 ---
trainLoss: 1.0446e-02
LR: 1.0000e-07
valLoss: 1.0443e-02
Better valLoss: 1.0443e-02, Saving models...


 33%|███▎      | 329/1000 [05:31<11:13,  1.00s/it]

--- Epoch 330/1000 ---
trainLoss: 1.0442e-02
LR: 1.0000e-07
valLoss: 1.0439e-02
Better valLoss: 1.0439e-02, Saving models...


 33%|███▎      | 330/1000 [05:32<11:02,  1.01it/s]

--- Epoch 331/1000 ---
trainLoss: 1.0438e-02
LR: 1.0000e-07
valLoss: 1.0435e-02
Better valLoss: 1.0435e-02, Saving models...


 33%|███▎      | 331/1000 [05:33<11:01,  1.01it/s]

--- Epoch 332/1000 ---
trainLoss: 1.0434e-02
LR: 1.0000e-07
valLoss: 1.0431e-02
Better valLoss: 1.0431e-02, Saving models...


 33%|███▎      | 332/1000 [05:34<11:09,  1.00s/it]

--- Epoch 333/1000 ---
trainLoss: 1.0430e-02
LR: 1.0000e-07
valLoss: 1.0427e-02
Better valLoss: 1.0427e-02, Saving models...


 33%|███▎      | 333/1000 [05:35<11:05,  1.00it/s]

--- Epoch 334/1000 ---
trainLoss: 1.0426e-02
LR: 1.0000e-07
valLoss: 1.0423e-02
Better valLoss: 1.0423e-02, Saving models...


 33%|███▎      | 334/1000 [05:36<10:48,  1.03it/s]

--- Epoch 335/1000 ---
trainLoss: 1.0422e-02
LR: 1.0000e-07
valLoss: 1.0419e-02
Better valLoss: 1.0419e-02, Saving models...


 34%|███▎      | 335/1000 [05:37<10:46,  1.03it/s]

--- Epoch 336/1000 ---
trainLoss: 1.0418e-02
LR: 1.0000e-07
valLoss: 1.0415e-02
Better valLoss: 1.0415e-02, Saving models...


 34%|███▎      | 336/1000 [05:37<10:48,  1.02it/s]

--- Epoch 337/1000 ---
trainLoss: 1.0414e-02
LR: 1.0000e-07
valLoss: 1.0411e-02
Better valLoss: 1.0411e-02, Saving models...


 34%|███▎      | 337/1000 [05:39<12:25,  1.12s/it]

--- Epoch 338/1000 ---
trainLoss: 1.0410e-02
LR: 1.0000e-07
valLoss: 1.0407e-02
Better valLoss: 1.0407e-02, Saving models...


 34%|███▍      | 338/1000 [05:40<12:00,  1.09s/it]

--- Epoch 339/1000 ---
trainLoss: 1.0406e-02
LR: 1.0000e-07
valLoss: 1.0403e-02
Better valLoss: 1.0403e-02, Saving models...


 34%|███▍      | 339/1000 [05:41<11:36,  1.05s/it]

--- Epoch 340/1000 ---
trainLoss: 1.0402e-02
LR: 1.0000e-07
valLoss: 1.0399e-02
Better valLoss: 1.0399e-02, Saving models...


 34%|███▍      | 340/1000 [05:42<11:19,  1.03s/it]

--- Epoch 341/1000 ---
trainLoss: 1.0398e-02
LR: 1.0000e-07
valLoss: 1.0395e-02
Better valLoss: 1.0395e-02, Saving models...


 34%|███▍      | 341/1000 [05:43<11:01,  1.00s/it]

--- Epoch 342/1000 ---
trainLoss: 1.0394e-02
LR: 1.0000e-07
valLoss: 1.0391e-02
Better valLoss: 1.0391e-02, Saving models...


 34%|███▍      | 342/1000 [05:44<11:15,  1.03s/it]

--- Epoch 343/1000 ---
trainLoss: 1.0390e-02
LR: 1.0000e-07
valLoss: 1.0387e-02
Better valLoss: 1.0387e-02, Saving models...


 34%|███▍      | 343/1000 [05:45<11:23,  1.04s/it]

--- Epoch 344/1000 ---
trainLoss: 1.0386e-02
LR: 1.0000e-07
valLoss: 1.0383e-02
Better valLoss: 1.0383e-02, Saving models...


 34%|███▍      | 344/1000 [05:46<11:09,  1.02s/it]

--- Epoch 345/1000 ---
trainLoss: 1.0382e-02
LR: 1.0000e-07
valLoss: 1.0379e-02
Better valLoss: 1.0379e-02, Saving models...


 34%|███▍      | 345/1000 [05:47<11:03,  1.01s/it]

--- Epoch 346/1000 ---
trainLoss: 1.0378e-02
LR: 1.0000e-07
valLoss: 1.0375e-02
Better valLoss: 1.0375e-02, Saving models...


 35%|███▍      | 346/1000 [05:48<10:57,  1.01s/it]

--- Epoch 347/1000 ---
trainLoss: 1.0374e-02
LR: 1.0000e-07
valLoss: 1.0371e-02
Better valLoss: 1.0371e-02, Saving models...


 35%|███▍      | 347/1000 [05:49<10:58,  1.01s/it]

--- Epoch 348/1000 ---
trainLoss: 1.0370e-02
LR: 1.0000e-07
valLoss: 1.0367e-02
Better valLoss: 1.0367e-02, Saving models...


 35%|███▍      | 348/1000 [05:50<10:46,  1.01it/s]

--- Epoch 349/1000 ---
trainLoss: 1.0366e-02
LR: 1.0000e-07
valLoss: 1.0363e-02
Better valLoss: 1.0363e-02, Saving models...


 35%|███▍      | 349/1000 [05:51<10:54,  1.01s/it]

--- Epoch 350/1000 ---
trainLoss: 1.0362e-02
LR: 1.0000e-07
valLoss: 1.0359e-02
Better valLoss: 1.0359e-02, Saving models...


 35%|███▌      | 350/1000 [05:52<10:50,  1.00s/it]

--- Epoch 351/1000 ---
trainLoss: 1.0358e-02
LR: 1.0000e-07
valLoss: 1.0355e-02
Better valLoss: 1.0355e-02, Saving models...


 35%|███▌      | 351/1000 [05:53<10:41,  1.01it/s]

--- Epoch 352/1000 ---
trainLoss: 1.0354e-02
LR: 1.0000e-07
valLoss: 1.0351e-02
Better valLoss: 1.0351e-02, Saving models...


 35%|███▌      | 352/1000 [05:54<10:46,  1.00it/s]

--- Epoch 353/1000 ---
trainLoss: 1.0350e-02
LR: 1.0000e-07
valLoss: 1.0347e-02
Better valLoss: 1.0347e-02, Saving models...


 35%|███▌      | 353/1000 [05:55<10:41,  1.01it/s]

--- Epoch 354/1000 ---
trainLoss: 1.0346e-02
LR: 1.0000e-07
valLoss: 1.0343e-02
Better valLoss: 1.0343e-02, Saving models...


 35%|███▌      | 354/1000 [05:56<10:38,  1.01it/s]

--- Epoch 355/1000 ---
trainLoss: 1.0342e-02
LR: 1.0000e-07
valLoss: 1.0339e-02
Better valLoss: 1.0339e-02, Saving models...


 36%|███▌      | 355/1000 [05:57<10:51,  1.01s/it]

--- Epoch 356/1000 ---
trainLoss: 1.0338e-02
LR: 1.0000e-07
valLoss: 1.0335e-02
Better valLoss: 1.0335e-02, Saving models...


 36%|███▌      | 356/1000 [05:58<10:50,  1.01s/it]

--- Epoch 357/1000 ---
trainLoss: 1.0334e-02
LR: 1.0000e-07
valLoss: 1.0331e-02
Better valLoss: 1.0331e-02, Saving models...


 36%|███▌      | 357/1000 [05:59<11:08,  1.04s/it]

--- Epoch 358/1000 ---
trainLoss: 1.0330e-02
LR: 1.0000e-07
valLoss: 1.0327e-02
Better valLoss: 1.0327e-02, Saving models...


 36%|███▌      | 358/1000 [06:00<11:00,  1.03s/it]

--- Epoch 359/1000 ---
trainLoss: 1.0326e-02
LR: 1.0000e-07
valLoss: 1.0323e-02
Better valLoss: 1.0323e-02, Saving models...


 36%|███▌      | 359/1000 [06:01<10:50,  1.01s/it]

--- Epoch 360/1000 ---
trainLoss: 1.0322e-02
LR: 1.0000e-07
valLoss: 1.0319e-02
Better valLoss: 1.0319e-02, Saving models...


 36%|███▌      | 360/1000 [06:02<10:46,  1.01s/it]

--- Epoch 361/1000 ---
trainLoss: 1.0318e-02
LR: 1.0000e-07
valLoss: 1.0315e-02
Better valLoss: 1.0315e-02, Saving models...


 36%|███▌      | 361/1000 [06:03<10:39,  1.00s/it]

--- Epoch 362/1000 ---
trainLoss: 1.0314e-02
LR: 1.0000e-07
valLoss: 1.0311e-02
Better valLoss: 1.0311e-02, Saving models...


 36%|███▌      | 362/1000 [06:04<10:47,  1.01s/it]

--- Epoch 363/1000 ---
trainLoss: 1.0310e-02
LR: 1.0000e-07
valLoss: 1.0307e-02
Better valLoss: 1.0307e-02, Saving models...


 36%|███▋      | 363/1000 [06:05<10:46,  1.02s/it]

--- Epoch 364/1000 ---
trainLoss: 1.0306e-02
LR: 1.0000e-07
valLoss: 1.0303e-02
Better valLoss: 1.0303e-02, Saving models...


 36%|███▋      | 364/1000 [06:06<10:32,  1.01it/s]

--- Epoch 365/1000 ---
trainLoss: 1.0302e-02
LR: 1.0000e-07
valLoss: 1.0299e-02
Better valLoss: 1.0299e-02, Saving models...


 36%|███▋      | 365/1000 [06:07<10:24,  1.02it/s]

--- Epoch 366/1000 ---
trainLoss: 1.0298e-02
LR: 1.0000e-07
valLoss: 1.0295e-02
Better valLoss: 1.0295e-02, Saving models...


 37%|███▋      | 366/1000 [06:08<10:19,  1.02it/s]

--- Epoch 367/1000 ---
trainLoss: 1.0294e-02
LR: 1.0000e-07
valLoss: 1.0291e-02
Better valLoss: 1.0291e-02, Saving models...


 37%|███▋      | 367/1000 [06:09<10:09,  1.04it/s]

--- Epoch 368/1000 ---
trainLoss: 1.0290e-02
LR: 1.0000e-07
valLoss: 1.0287e-02
Better valLoss: 1.0287e-02, Saving models...


 37%|███▋      | 368/1000 [06:10<10:15,  1.03it/s]

--- Epoch 369/1000 ---
trainLoss: 1.0286e-02
LR: 1.0000e-07
valLoss: 1.0283e-02
Better valLoss: 1.0283e-02, Saving models...


 37%|███▋      | 369/1000 [06:11<10:23,  1.01it/s]

--- Epoch 370/1000 ---
trainLoss: 1.0282e-02
LR: 1.0000e-07
valLoss: 1.0279e-02
Better valLoss: 1.0279e-02, Saving models...


 37%|███▋      | 370/1000 [06:12<10:31,  1.00s/it]

--- Epoch 371/1000 ---
trainLoss: 1.0278e-02
LR: 1.0000e-07
valLoss: 1.0275e-02
Better valLoss: 1.0275e-02, Saving models...


 37%|███▋      | 371/1000 [06:13<10:27,  1.00it/s]

--- Epoch 372/1000 ---
trainLoss: 1.0274e-02
LR: 1.0000e-07
valLoss: 1.0271e-02
Better valLoss: 1.0271e-02, Saving models...


 37%|███▋      | 372/1000 [06:14<10:24,  1.01it/s]

--- Epoch 373/1000 ---
trainLoss: 1.0270e-02
LR: 1.0000e-07
valLoss: 1.0267e-02
Better valLoss: 1.0267e-02, Saving models...


 37%|███▋      | 373/1000 [06:15<10:24,  1.00it/s]

--- Epoch 374/1000 ---
trainLoss: 1.0266e-02
LR: 1.0000e-07
valLoss: 1.0263e-02
Better valLoss: 1.0263e-02, Saving models...


 37%|███▋      | 374/1000 [06:16<10:16,  1.01it/s]

--- Epoch 375/1000 ---
trainLoss: 1.0262e-02
LR: 1.0000e-07
valLoss: 1.0259e-02
Better valLoss: 1.0259e-02, Saving models...


 38%|███▊      | 375/1000 [06:17<10:27,  1.00s/it]

--- Epoch 376/1000 ---
trainLoss: 1.0258e-02
LR: 1.0000e-07
valLoss: 1.0255e-02
Better valLoss: 1.0255e-02, Saving models...


 38%|███▊      | 376/1000 [06:18<10:23,  1.00it/s]

--- Epoch 377/1000 ---
trainLoss: 1.0254e-02
LR: 1.0000e-07
valLoss: 1.0251e-02
Better valLoss: 1.0251e-02, Saving models...


 38%|███▊      | 377/1000 [06:19<10:34,  1.02s/it]

--- Epoch 378/1000 ---
trainLoss: 1.0250e-02
LR: 1.0000e-07
valLoss: 1.0247e-02
Better valLoss: 1.0247e-02, Saving models...


 38%|███▊      | 378/1000 [06:20<10:13,  1.01it/s]

--- Epoch 379/1000 ---
trainLoss: 1.0246e-02
LR: 1.0000e-07
valLoss: 1.0244e-02
Better valLoss: 1.0244e-02, Saving models...


 38%|███▊      | 379/1000 [06:21<09:57,  1.04it/s]

--- Epoch 380/1000 ---
trainLoss: 1.0242e-02
LR: 1.0000e-07
valLoss: 1.0239e-02
Better valLoss: 1.0239e-02, Saving models...


 38%|███▊      | 380/1000 [06:22<09:57,  1.04it/s]

--- Epoch 381/1000 ---
trainLoss: 1.0238e-02
LR: 1.0000e-07
valLoss: 1.0235e-02
Better valLoss: 1.0235e-02, Saving models...


 38%|███▊      | 381/1000 [06:23<09:50,  1.05it/s]

--- Epoch 382/1000 ---
trainLoss: 1.0234e-02
LR: 1.0000e-07
valLoss: 1.0231e-02
Better valLoss: 1.0231e-02, Saving models...


 38%|███▊      | 382/1000 [06:24<10:01,  1.03it/s]

--- Epoch 383/1000 ---
trainLoss: 1.0230e-02
LR: 1.0000e-07
valLoss: 1.0228e-02
Better valLoss: 1.0228e-02, Saving models...


 38%|███▊      | 383/1000 [06:25<09:51,  1.04it/s]

--- Epoch 384/1000 ---
trainLoss: 1.0227e-02
LR: 1.0000e-07
valLoss: 1.0223e-02
Better valLoss: 1.0223e-02, Saving models...


 38%|███▊      | 384/1000 [06:26<09:51,  1.04it/s]

--- Epoch 385/1000 ---
trainLoss: 1.0222e-02
LR: 1.0000e-07
valLoss: 1.0219e-02
Better valLoss: 1.0219e-02, Saving models...


 38%|███▊      | 385/1000 [06:27<10:05,  1.01it/s]

--- Epoch 386/1000 ---
trainLoss: 1.0218e-02
LR: 1.0000e-07
valLoss: 1.0215e-02
Better valLoss: 1.0215e-02, Saving models...


 39%|███▊      | 386/1000 [06:28<10:13,  1.00it/s]

--- Epoch 387/1000 ---
trainLoss: 1.0214e-02
LR: 1.0000e-07
valLoss: 1.0211e-02
Better valLoss: 1.0211e-02, Saving models...


 39%|███▊      | 387/1000 [06:29<10:11,  1.00it/s]

--- Epoch 388/1000 ---
trainLoss: 1.0210e-02
LR: 1.0000e-07


 39%|███▉      | 388/1000 [06:30<10:00,  1.02it/s]

valLoss: 1.0208e-02
Better valLoss: 1.0208e-02, Saving models...
--- Epoch 389/1000 ---
trainLoss: 1.0207e-02
LR: 1.0000e-07
valLoss: 1.0204e-02
Better valLoss: 1.0204e-02, Saving models...


 39%|███▉      | 389/1000 [06:31<09:53,  1.03it/s]

--- Epoch 390/1000 ---
trainLoss: 1.0203e-02
LR: 1.0000e-07
valLoss: 1.0199e-02
Better valLoss: 1.0199e-02, Saving models...


 39%|███▉      | 390/1000 [06:32<09:46,  1.04it/s]

--- Epoch 391/1000 ---
trainLoss: 1.0198e-02
LR: 1.0000e-07
valLoss: 1.0195e-02
Better valLoss: 1.0195e-02, Saving models...


 39%|███▉      | 391/1000 [06:33<09:55,  1.02it/s]

--- Epoch 392/1000 ---
trainLoss: 1.0194e-02
LR: 1.0000e-07
valLoss: 1.0192e-02
Better valLoss: 1.0192e-02, Saving models...


 39%|███▉      | 392/1000 [06:34<10:06,  1.00it/s]

--- Epoch 393/1000 ---
trainLoss: 1.0191e-02
LR: 1.0000e-07
valLoss: 1.0188e-02
Better valLoss: 1.0188e-02, Saving models...


 39%|███▉      | 393/1000 [06:35<10:05,  1.00it/s]

--- Epoch 394/1000 ---
trainLoss: 1.0187e-02
LR: 1.0000e-07
valLoss: 1.0184e-02
Better valLoss: 1.0184e-02, Saving models...


 39%|███▉      | 394/1000 [06:36<09:56,  1.02it/s]

--- Epoch 395/1000 ---
trainLoss: 1.0183e-02
LR: 1.0000e-07
valLoss: 1.0180e-02
Better valLoss: 1.0180e-02, Saving models...


 40%|███▉      | 395/1000 [06:37<10:07,  1.00s/it]

--- Epoch 396/1000 ---
trainLoss: 1.0179e-02
LR: 1.0000e-07
valLoss: 1.0176e-02
Better valLoss: 1.0176e-02, Saving models...


 40%|███▉      | 396/1000 [06:38<10:07,  1.01s/it]

--- Epoch 397/1000 ---
trainLoss: 1.0175e-02
LR: 1.0000e-07
valLoss: 1.0172e-02
Better valLoss: 1.0172e-02, Saving models...


 40%|███▉      | 397/1000 [06:39<10:08,  1.01s/it]

--- Epoch 398/1000 ---
trainLoss: 1.0171e-02
LR: 1.0000e-07
valLoss: 1.0168e-02
Better valLoss: 1.0168e-02, Saving models...


 40%|███▉      | 398/1000 [06:40<10:13,  1.02s/it]

--- Epoch 399/1000 ---
trainLoss: 1.0167e-02
LR: 1.0000e-07
valLoss: 1.0164e-02
Better valLoss: 1.0164e-02, Saving models...


 40%|███▉      | 399/1000 [06:41<10:12,  1.02s/it]

--- Epoch 400/1000 ---
trainLoss: 1.0163e-02
LR: 1.0000e-07
valLoss: 1.0160e-02
Better valLoss: 1.0160e-02, Saving models...


 40%|████      | 400/1000 [06:42<10:16,  1.03s/it]

--- Epoch 401/1000 ---
trainLoss: 1.0159e-02
LR: 1.0000e-07
valLoss: 1.0156e-02
Better valLoss: 1.0156e-02, Saving models...


 40%|████      | 401/1000 [06:43<10:10,  1.02s/it]

--- Epoch 402/1000 ---
trainLoss: 1.0155e-02
LR: 1.0000e-07
valLoss: 1.0152e-02
Better valLoss: 1.0152e-02, Saving models...


 40%|████      | 402/1000 [06:44<10:07,  1.02s/it]

--- Epoch 403/1000 ---
trainLoss: 1.0151e-02
LR: 1.0000e-07
valLoss: 1.0148e-02
Better valLoss: 1.0148e-02, Saving models...


 40%|████      | 403/1000 [06:45<10:05,  1.01s/it]

--- Epoch 404/1000 ---
trainLoss: 1.0147e-02
LR: 1.0000e-07
valLoss: 1.0144e-02
Better valLoss: 1.0144e-02, Saving models...


 40%|████      | 404/1000 [06:46<09:49,  1.01it/s]

--- Epoch 405/1000 ---
trainLoss: 1.0143e-02
LR: 1.0000e-07
valLoss: 1.0140e-02
Better valLoss: 1.0140e-02, Saving models...


 40%|████      | 405/1000 [06:47<09:53,  1.00it/s]

--- Epoch 406/1000 ---
trainLoss: 1.0139e-02
LR: 1.0000e-07
valLoss: 1.0136e-02
Better valLoss: 1.0136e-02, Saving models...


 41%|████      | 406/1000 [06:48<09:54,  1.00s/it]

--- Epoch 407/1000 ---
trainLoss: 1.0135e-02
LR: 1.0000e-07
valLoss: 1.0132e-02
Better valLoss: 1.0132e-02, Saving models...


 41%|████      | 407/1000 [06:49<09:58,  1.01s/it]

--- Epoch 408/1000 ---
trainLoss: 1.0131e-02
LR: 1.0000e-07
valLoss: 1.0128e-02
Better valLoss: 1.0128e-02, Saving models...


 41%|████      | 408/1000 [06:50<09:50,  1.00it/s]

--- Epoch 409/1000 ---
trainLoss: 1.0127e-02
LR: 1.0000e-07
valLoss: 1.0124e-02
Better valLoss: 1.0124e-02, Saving models...


 41%|████      | 409/1000 [06:51<09:53,  1.00s/it]

--- Epoch 410/1000 ---
trainLoss: 1.0123e-02
LR: 1.0000e-07
valLoss: 1.0120e-02
Better valLoss: 1.0120e-02, Saving models...


 41%|████      | 410/1000 [06:52<09:47,  1.00it/s]

--- Epoch 411/1000 ---
trainLoss: 1.0119e-02
LR: 1.0000e-07
valLoss: 1.0116e-02
Better valLoss: 1.0116e-02, Saving models...


 41%|████      | 411/1000 [06:53<09:36,  1.02it/s]

--- Epoch 412/1000 ---
trainLoss: 1.0115e-02
LR: 1.0000e-07
valLoss: 1.0112e-02
Better valLoss: 1.0112e-02, Saving models...


 41%|████      | 412/1000 [06:54<09:54,  1.01s/it]

--- Epoch 413/1000 ---
trainLoss: 1.0111e-02
LR: 1.0000e-07
valLoss: 1.0108e-02
Better valLoss: 1.0108e-02, Saving models...


 41%|████▏     | 413/1000 [06:55<09:56,  1.02s/it]

--- Epoch 414/1000 ---
trainLoss: 1.0107e-02
LR: 1.0000e-07
valLoss: 1.0104e-02
Better valLoss: 1.0104e-02, Saving models...


 41%|████▏     | 414/1000 [06:56<09:52,  1.01s/it]

--- Epoch 415/1000 ---
trainLoss: 1.0103e-02
LR: 1.0000e-07
valLoss: 1.0100e-02
Better valLoss: 1.0100e-02, Saving models...


 42%|████▏     | 415/1000 [06:57<09:55,  1.02s/it]

--- Epoch 416/1000 ---
trainLoss: 1.0099e-02
LR: 1.0000e-07
valLoss: 1.0096e-02
Better valLoss: 1.0096e-02, Saving models...


 42%|████▏     | 416/1000 [06:58<09:50,  1.01s/it]

--- Epoch 417/1000 ---
trainLoss: 1.0095e-02
LR: 1.0000e-07
valLoss: 1.0092e-02
Better valLoss: 1.0092e-02, Saving models...


 42%|████▏     | 417/1000 [06:59<09:45,  1.00s/it]

--- Epoch 418/1000 ---
trainLoss: 1.0091e-02
LR: 1.0000e-07
valLoss: 1.0088e-02
Better valLoss: 1.0088e-02, Saving models...


 42%|████▏     | 418/1000 [07:00<09:40,  1.00it/s]

--- Epoch 419/1000 ---
trainLoss: 1.0087e-02
LR: 1.0000e-07
valLoss: 1.0084e-02
Better valLoss: 1.0084e-02, Saving models...


 42%|████▏     | 419/1000 [07:01<09:38,  1.00it/s]

--- Epoch 420/1000 ---
trainLoss: 1.0083e-02
LR: 1.0000e-07
valLoss: 1.0080e-02
Better valLoss: 1.0080e-02, Saving models...


 42%|████▏     | 420/1000 [07:02<09:36,  1.01it/s]

--- Epoch 421/1000 ---
trainLoss: 1.0079e-02
LR: 1.0000e-07
valLoss: 1.0076e-02
Better valLoss: 1.0076e-02, Saving models...


 42%|████▏     | 421/1000 [07:03<09:28,  1.02it/s]

--- Epoch 422/1000 ---
trainLoss: 1.0076e-02
LR: 1.0000e-07
valLoss: 1.0073e-02
Better valLoss: 1.0073e-02, Saving models...


 42%|████▏     | 422/1000 [07:04<09:25,  1.02it/s]

--- Epoch 423/1000 ---
trainLoss: 1.0072e-02
LR: 1.0000e-07
valLoss: 1.0069e-02
Better valLoss: 1.0069e-02, Saving models...


 42%|████▏     | 423/1000 [07:05<09:08,  1.05it/s]

--- Epoch 424/1000 ---
trainLoss: 1.0068e-02
LR: 1.0000e-07
valLoss: 1.0065e-02
Better valLoss: 1.0065e-02, Saving models...


 42%|████▏     | 424/1000 [07:05<08:58,  1.07it/s]

--- Epoch 425/1000 ---
trainLoss: 1.0064e-02
LR: 1.0000e-07
valLoss: 1.0061e-02
Better valLoss: 1.0061e-02, Saving models...


 42%|████▎     | 425/1000 [07:06<08:53,  1.08it/s]

--- Epoch 426/1000 ---
trainLoss: 1.0060e-02
LR: 1.0000e-07
valLoss: 1.0057e-02
Better valLoss: 1.0057e-02, Saving models...


 43%|████▎     | 426/1000 [07:07<09:10,  1.04it/s]

--- Epoch 427/1000 ---
trainLoss: 1.0056e-02
LR: 1.0000e-07
valLoss: 1.0053e-02
Better valLoss: 1.0053e-02, Saving models...


 43%|████▎     | 427/1000 [07:08<09:15,  1.03it/s]

--- Epoch 428/1000 ---
trainLoss: 1.0052e-02
LR: 1.0000e-07
valLoss: 1.0049e-02
Better valLoss: 1.0049e-02, Saving models...


 43%|████▎     | 428/1000 [07:09<09:15,  1.03it/s]

--- Epoch 429/1000 ---
trainLoss: 1.0048e-02
LR: 1.0000e-07
valLoss: 1.0045e-02
Better valLoss: 1.0045e-02, Saving models...


 43%|████▎     | 429/1000 [07:10<09:21,  1.02it/s]

--- Epoch 430/1000 ---
trainLoss: 1.0044e-02
LR: 1.0000e-07
valLoss: 1.0041e-02
Better valLoss: 1.0041e-02, Saving models...


 43%|████▎     | 430/1000 [07:11<09:24,  1.01it/s]

--- Epoch 431/1000 ---
trainLoss: 1.0040e-02
LR: 1.0000e-07
valLoss: 1.0037e-02
Better valLoss: 1.0037e-02, Saving models...


 43%|████▎     | 431/1000 [07:12<09:13,  1.03it/s]

--- Epoch 432/1000 ---
trainLoss: 1.0036e-02
LR: 1.0000e-07
valLoss: 1.0033e-02
Better valLoss: 1.0033e-02, Saving models...


 43%|████▎     | 432/1000 [07:13<09:16,  1.02it/s]

--- Epoch 433/1000 ---
trainLoss: 1.0032e-02
LR: 1.0000e-07
valLoss: 1.0029e-02
Better valLoss: 1.0029e-02, Saving models...


 43%|████▎     | 433/1000 [07:14<09:06,  1.04it/s]

--- Epoch 434/1000 ---
trainLoss: 1.0028e-02
LR: 1.0000e-07
valLoss: 1.0025e-02
Better valLoss: 1.0025e-02, Saving models...


 43%|████▎     | 434/1000 [07:15<08:54,  1.06it/s]

--- Epoch 435/1000 ---
trainLoss: 1.0024e-02
LR: 1.0000e-07
valLoss: 1.0021e-02
Better valLoss: 1.0021e-02, Saving models...


 44%|████▎     | 435/1000 [07:16<08:43,  1.08it/s]

--- Epoch 436/1000 ---
trainLoss: 1.0020e-02
LR: 1.0000e-07
valLoss: 1.0017e-02
Better valLoss: 1.0017e-02, Saving models...


 44%|████▎     | 436/1000 [07:17<08:43,  1.08it/s]

--- Epoch 437/1000 ---
trainLoss: 1.0016e-02
LR: 1.0000e-07
valLoss: 1.0013e-02
Better valLoss: 1.0013e-02, Saving models...


 44%|████▎     | 437/1000 [07:18<08:57,  1.05it/s]

--- Epoch 438/1000 ---
trainLoss: 1.0012e-02
LR: 1.0000e-07
valLoss: 1.0009e-02
Better valLoss: 1.0009e-02, Saving models...


 44%|████▍     | 438/1000 [07:19<09:01,  1.04it/s]

--- Epoch 439/1000 ---
trainLoss: 1.0008e-02
LR: 1.0000e-07
valLoss: 1.0005e-02
Better valLoss: 1.0005e-02, Saving models...


 44%|████▍     | 439/1000 [07:20<09:21,  1.00s/it]

--- Epoch 440/1000 ---
trainLoss: 1.0004e-02
LR: 1.0000e-07
valLoss: 1.0001e-02
Better valLoss: 1.0001e-02, Saving models...


 44%|████▍     | 440/1000 [07:21<09:20,  1.00s/it]

--- Epoch 441/1000 ---
trainLoss: 1.0000e-02
LR: 1.0000e-07
valLoss: 9.9973e-03
Better valLoss: 9.9973e-03, Saving models...


 44%|████▍     | 441/1000 [07:22<09:10,  1.02it/s]

--- Epoch 442/1000 ---
trainLoss: 9.9964e-03
LR: 1.0000e-07
valLoss: 9.9935e-03
Better valLoss: 9.9935e-03, Saving models...


 44%|████▍     | 442/1000 [07:23<09:16,  1.00it/s]

--- Epoch 443/1000 ---


 44%|████▍     | 442/1000 [07:23<09:20,  1.00s/it]


KeyboardInterrupt: 

In [None]:
def relative_error_info(model, data, tensorboard_writer, tensorboard_recorder_step, quiet=False):
    U_hats = []
    Us = []
    for i, sampleBatch in enumerate(data, start=1):
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.to(device)
        p_x = p_x.to(device)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
        with torch.no_grad():
            Us.append(U_y.detach().cpu())
            U_hat = model(U_x, p_x, p_y, window=simLen-1)
            U_hats.append(U_hat.detach().cpu())
            
    Real_U = torch.stack(Us)
    Surr_U = torch.stack(U_hats)
    rel_error = torch.norm(Real_U - Surr_U)/torch.norm(Real_U)
    writeMessage("Relative_Error: {:.4e}".format(rel_error),versionName)
    if not quiet:
        tensorboard_writer.add_scalar(tag="Relative_Error", scalar_value=rel_error,
                                  global_step=tensorboard_recorder_step)

    
    sample_sim = U_hat[0].cpu()
    sample_GT = U_y[0].cpu()
    fig = plt.figure()
    frame_error = torch.norm((sample_GT - sample_sim).view(simLen-1,-1), 
                                 dim=1)/torch.norm(sample_GT.view(simLen-1,-1), dim=1)
    plt.plot(range(1,simLen),frame_error)
    plt.title("Relative Error over Time for One Test Simulation")
    if not quiet:
        tensorboard_writer.add_figure('test_rel_error_by_frame', fig, global_step=tensorboard_recorder_step,
                                  close=True, walltime=None)
    
    return sample_sim, sample_GT, rel_error

In [None]:
sample_sim, sample_GT, rel_error = relative_error_info(surrogate, trainDataLoader, 
                                                       None, None, quiet=True)

In [None]:
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
import matplotlib.animation as manimati
from matplotlib import animation, rc
from IPython.display import Video
def create_1_channel_movie(im,outfile='sim.mp4',title='surrogate            simulation'):
    ti = 0
    u_mx = 255 #np.max(np.abs(Xrgb))
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.title(title)
    #cmap = plt.cm.ocean
    img = ax.imshow(im[0].squeeze(), cmap=plt.cm.viridis, vmin=0, vmax=u_mx)
    #plt.show()
    
    # initialization function: plot the background of each frame
    def init():
        img = ax.imshow(im[0].squeeze(),  vmin=0, vmax=u_mx)
        return (fig,)

    # animation function. This is called sequentially
    def animate(i):
        img = ax.imshow(im[i].squeeze(), vmin=0, vmax=u_mx)
        return (fig,)


    # call the animator. blit=True means only re-draw the parts that have changed.
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=len(im), interval=100, blit=True)
    anim.save(outfile, fps=1, extra_args=['-vcodec', 'libx264'])
    
def get_img(X):
    M = 255
    mx = X.max()
    mn = X.min()
    X = (X - mn)/(mx - mn)
    C = (M*X).type(torch.uint8)
    return C

def make_movie(sample_sim, sample_GT, ID = ''):
    # get sample_sim and sample_GT from the relative_error_info function above
    Xrgb = torch.cat([get_img(sample_sim), get_img(sample_GT)], dim=3)[:,0].detach().cpu().numpy()
    Xrgb.shape
    
    video_name = versionName + ID
    
    # to-do: dave says we should flip vertically before making the mp4
    outGif = '{}.mp4'.format(video_name)
    create_1_channel_movie(Xrgb,outfile=outGif)

In [None]:
make_movie(sample_sim, sample_GT, ID='_train')
Video('{}.mp4'.format(versionName+'_train'))

In [44]:

Video('{}.mp4'.format(versionName+'_train'))

In [49]:
surrogate.load_state_dict(torch.load(os.path.join(cps,versionName)))

<All keys matched successfully>

In [52]:
X,Y, p_x, p_y = next(iter(testDataLoader))
X.shape,Y.shape, p_x.shape, p_y.shape, len(testDataLoader)

(torch.Size([3, 1, 128, 128]),
 torch.Size([3, 5, 1, 128, 128]),
 torch.Size([3, 2]),
 torch.Size([3, 5, 2]),
 2)

In [53]:
surrogate.eval()
U_hats = []
Us = []
for i, sampleBatch in enumerate(testDataLoader, start=1):

    # gpu
    U_x, U_y, p_x, p_y = sampleBatch
    U_x = U_x.to(device)
    p_x = p_x.to(device)
    U_y = U_y.to(device)
    p_y = p_y.to(device)
    with torch.no_grad():
        Us.append(U_y.detach().cpu())
        
        U_hat = surrogate(U_x, p_x, p_y, window=simLen-1)
                    
        U_hats.append(U_hat.detach().cpu())
        
        
Real_U = torch.stack(Us)
#Real_X_img = convertSimToImage(Real_X)

Surr_U = torch.stack(U_hats)
#Surr_X_img = convertSimToImage(Surr_X)

In [54]:
for a,b in zip(Us,U_hats):
    rel_error = torch.norm(a - b)/torch.norm(a)
    writeMessage("Relative_Error: {:.4e}".format(rel_error),versionName)

Relative_Error: 9.9929e-03
Relative_Error: 9.9929e-03


In [55]:
rel_error = torch.norm(Real_U - Surr_U)/torch.norm(Real_U)
writeMessage("Relative_Error: {:.4e}".format(rel_error),versionName)
test_writer.add_scalar(tag="Relative_Error", scalar_value=rel_error, global_step=tensorboard_recorder_step)
test_writer.flush()

Relative_Error: 9.9926e-03
