In [1]:
# EXPORT
# --- Must haves ---
import os, sys
sys.path.append('..')

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.cuda as cuda
import torch.nn as nn
import torchvision
import torch.nn.functional as F

from surrogates4sims.mantaflowDatasets import MantaFlowDataset, getSingleSim, createMantaFlowTrainTest

from surrogates4sims.utils import create_opt, create_one_cycle, find_lr, printNumModelParams, \
                                    rmse, writeMessage, plotSampleWprediction, plotSampleWpredictionByChannel, \
                                    plotSample, curl, jacobian, stream2uv, create_movie, convertSimToImage

from surrogates4sims.models import Generator, Encoder, AE_no_P, AE_xhat_z, AE_xhat_zV2

from surrogates4sims.train import trainEpoch, validEpoch

import numpy as np
from tqdm import tqdm
from copy import deepcopy

In [2]:
# data 
eval_only=True
DEBUG = False
# model name, for tensorboard recording and checkpointing purposes.
versionName = "plateau_train"

# GPU Numbers to use. Comma seprate them for multi-GPUs.
gpu_ids = "1,2"
versionName = versionName + '_GPUs{}'.format(gpu_ids.replace(',',''))
# path to load model weights.
pretrained_path = None

# rate at which to record metrics. (number of batches to average over when recording metrics, e.g. "every 5 batches")
tensorboard_rate = 5

# number of epochs to train. This is defined here so we can use the OneCycle LR Scheduler.
epochs = 1000

# Data Directory
dataDirec = '/data/mantaFlowSim/data/smoke_pos21_size5_f200/v'
reverseXY = False 

# checkpoint directory
cps = 'cps'
tensorboard_direc = "tb"

findLRs = False  

# hyper-params
seed = 1234
np.random.seed(seed)
testSplit = .1
bz = 20
numSamplesToKeep = 400 #if not debugging
latentDim = 16
window_size = 5
filters = 128
num_conv = 4 # breaks when less than 2
simLen = 200
stack = True
simVizIndex = 0 # sim in the test set to visualize
createStreamFcn = False
doJacobian = False
repeat = 0
skip_connection = False
patience = 2
if DEBUG:
    epochs = 10000
    numSamplesToKeep = bz
    
versionName = versionName + '_latentDim{}_filters{}_bz{}_numConv{}_stream{}_jacobian{}_epochs{}_stack{}'.format(latentDim,filters,bz,num_conv,createStreamFcn,doJacobian,epochs,stack)
versionName


os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]=gpu_ids
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

trainData, testData = createMantaFlowTrainTest(dataDirec,simLen,testSplit,seed)
print((len(trainData),len(testData)))

Using device: cuda
(19000, 2000)


In [3]:
class MantaFlowDataset(Dataset):
    def __init__(self, 
                 dataDirec='/home/widemann1/carbon_capture/surrogate_nn_for_pde/deep-fluids/data/smoke_pos21_size5_f200/v',
                 numToKeep=np.infty,transform=None, reverseXY=False, preprocess=True, AE=False,
                 w = 1, simLen = 200): 
        if type(dataDirec) == list:
            self.files = dataDirec
        else:
            self.files = glob(os.path.join(dataDirec,'*.npz'))
        self.dataDirec = dataDirec
        self.numToKeep = numToKeep
        self.transform = transform
        self.reverseXY = reverseXY
        self.AE = AE
        self.w = w
        self.simLen = simLen
        self.data = []
 
        if numToKeep < len(self.files):
            self.files = self.files[:numToKeep]
        for f in tqdm(self.files):
            X,y = self.loadfile(f)
            
            if preprocess:
                X,y = self.preprocessFcn(X,y)
                
            if reverseXY:
                self.data.append((y,X))
            else:
                self.data.append((X,y))

    def loadfile(self,fn):
        A = np.load(fn)
        X = A['x'].astype('float32')
        X = np.rollaxis(X,-1)
        y = A['y'].astype('float32')
        return X,y

    def preprocessFcn(self,X,y):
        x_range = 11.953
        X /= x_range
        y_range = [[0.2, 0.8], [0.04, 0.12], [0.0, 199.0]]
        for i, ri in enumerate(y_range):
            y[i] = (y[i]-ri[0]) / (ri[1]-ri[0]) * 2 - 1
        return X,y
    
    def __len__(self):
        return len(self.data)

    def plot(self,idx,savefig=False):
        X, label  = self.data[idx]
        if self.reverseXY:
            X = label
            
        plt.figure(figsize=(20,10))
        
        plt.subplot(211)
        fn = self.files[idx].replace('.npz','')
        title = '{} channel 0'.format(fn)
        plt.title(title)
        plt.imshow(X[0][::-1])
        plt.colorbar()
        
        plt.subplot(212)
        title = '{} channel 1'.format(fn)
        plt.title(title)
        plt.imshow(X[1][::-1])
        plt.colorbar()
        
        if savefig:
            title = title.replace(' ','_') + '.png'
            plt.savefig(title, dpi=300)
            plt.close()
        else:
            plt.show()
    
    def __getitem__(self, idx):
        q = idx // self.simLen
        r_idx = np.random.randint(0,self.simLen-self.w)
        x = self.data[q*simLen + r_idx : q*simLen + r_idx + 1]
        y = self.data[q*simLen + r_idx + 1 : q*simLen + r_idx + 1 + self.w]
        # to unpack this data into X (image) and p (cfg settings) arrays, use the following code
        U_x, p_x = zip(*x)
        U_y, p_y = zip(*y)
        return np.array(U_x), np.array(U_y), np.array(p_x), np.array(p_y)

In [4]:
# datasets may be smaller because: numSamplesToKeep 
testDataset = MantaFlowDataset(testData, reverseXY=reverseXY, numToKeep=numSamplesToKeep, AE=False,
                               w=window_size, simLen=200)
trainDataset = MantaFlowDataset(trainData, reverseXY=reverseXY,numToKeep=numSamplesToKeep, AE=False,
                                w=window_size, simLen=200)
len(trainDataset), len(testDataset)

trainDataLoader = DataLoader(dataset=trainDataset, batch_size=bz, shuffle=True, drop_last=True)
testDataLoader = DataLoader(dataset=testDataset, batch_size=bz)

U_x, U_y, p_x, p_y = next(iter(trainDataLoader))
print(U_x.shape, p_x.shape)
U_x = U_x.squeeze()
p_x = p_x.squeeze()
U_x.shape, U_y.shape, p_x.shape, p_y.shape

100%|██████████| 400/400 [00:00<00:00, 573.01it/s]
100%|██████████| 400/400 [00:00<00:00, 574.33it/s]


torch.Size([20, 1, 2, 128, 96]) torch.Size([20, 1, 3])


(torch.Size([20, 2, 128, 96]),
 torch.Size([20, 5, 2, 128, 96]),
 torch.Size([20, 3]),
 torch.Size([20, 5, 3]))

In [5]:
# Encoder 

AE_model = AE_xhat_zV2(U_x, filters, latentDim, num_conv, repeat, 
                 skip_connection, stack, conv_k=3, last_k=3, 
                 act=nn.LeakyReLU(), return_z=True, stream=createStreamFcn, device='cpu')

'''
if len(gpu_ids.split(',')) > 1:
    AE_model = nn.DataParallel(AE_model)
    
'''
Xhat,z = AE_model(U_x)
Xhat.shape, z.shape



[128, 8, 6]


(torch.Size([20, 2, 128, 96]), torch.Size([20, 16]))

In [6]:
!ls '/home/widemann1/surrogates4sims/cps/'

allinOneModel_batchnorm_GPUs0_latentDim16_filters128_bz16_numConv4_streamTrue_jacobianTrue_epochs2_stackTrue_lr0.0001
allinOneModel_latentDim16_filters128_bz16_numConv4_streamTrue_jacobianTrue_epochs100_stackFalse
allinOneModel_latentDim16_filters128_bz32_numConv4_streamFalse_jacobianTrue_epochs100_stackFalse
allinOneModel_latentDim16_filters128_bz32_numConv4_streamFalse_jacobianTrue_epochs100_stackTrue
allinOneModel_latentDim16_filters128_bz32_numConv4_streamTrue_jacobianTrue_epochs100_stackFalse
allinOneModel_latentDim16_filters128_bz64_numConv2_streamTrue_jacobianTrue_epochs100_stackFalse
allinOneModel_latentDim16_filters128_bz64_numConv4_streamTrue_jacobianTrue_epochs2_stackFalse
allinOneModel_latentDim16_filters128_bz8_numConv4_streamFalse_jacobianTrue_epochs100_stackFalse
allinOneModel_latentDim16_filters128_bz8_numConv4_streamFalse_jacobianTrue_epochs100_stackTrue
allinOneModel_latentDim16_filters32_bz128_numConv2_streamFalse_jacobianFalse_epochs100_stackTrue
allinOneM

In [7]:
versionName

'plateau_train_GPUs12_latentDim16_filters128_bz20_numConv4_streamFalse_jacobianFalse_epochs1000_stackTrue'

In [8]:
AE_model.load_state_dict(torch.load(os.path.join('/home/widemann1/surrogates4sims/cps',
'plateau_train_GPUs2_latentDim16_filters128_bz16_numConv4_streamFalse_jacobianFalse_epochs1000_stackTrue_lr0.0001')))

<All keys matched successfully>

In [9]:
# LIN Model
class MLP(nn.Module):
    def __init__(self, X, hiddenLayerSizes = [1024], activation=nn.ELU()):
        super(MLP,self).__init__()
        
        self.activation = activation
        self.inputSize = X.shape[1:]
        self.modules = []
        self.modules.append(nn.Linear(np.prod(self.inputSize),hiddenLayerSizes[0]))
        self.modules.append(self.activation)
        for idx,sz in enumerate(hiddenLayerSizes[:-1]):
            self.modules.append(nn.Linear(hiddenLayerSizes[idx],hiddenLayerSizes[idx+1]))
            self.modules.append(self.activation)
                               
        self.modules.append(nn.Linear(hiddenLayerSizes[-1],np.prod(self.inputSize)))
        self.layers = nn.Sequential(*self.modules)
                                
        
    def forward(self,x):
        x = self.layers(x)
        return x
    
hiddenLayers = [128,128]
LIN_model = MLP(z, hiddenLayerSizes=hiddenLayers, activation=nn.ELU())
'''
if len(gpu_ids.split(',')) > 1:
    LIN_model = nn.DataParallel(LIN_model)
'''

"\nif len(gpu_ids.split(',')) > 1:\n    LIN_model = nn.DataParallel(LIN_model)\n"

In [10]:
# LIN_model(torch.ones((1,16),device=device)).shape

In [11]:
# surrogate class

class Surrogate(nn.Module):
    
    def __init__(self, window,
                 z_size, p_size,
                LIN, encoder, decoder):
        super(Surrogate, self).__init__()
        self.window = window
        self.z_size = z_size # this does not include the size of p
        self.p_size = p_size
        self.c_size = z_size + p_size # this does include the size of p
        self.LIN = LIN
        self.encoder = encoder
        self.decoder = decoder
        
    def encode(self, U):
        
        return self.encoder(U)
        
    def decode(self, encoding):
        
        return self.decoder(encoding)
        
    def predict_next_w_encodings(self, encoding, p_y, window = None):
        '''
        use the LIN to predict the next w encodings for each 
        encoded U in the batch
        '''
        
        if window == None:
            window = self.window
            
        predicted_encodings = []
            
        # given a batch of encodings, advance each encoding window time steps.
        # save the result at each time step
        for i in range(window):
            encoding = self.LIN(encoding) + encoding # use LIN to predict delta in encoding
            # this was encoding[:,:,-self.p_size:] in 09_manta..., why the extra dimension?
            encoding[:,-self.p_size:] = p_y[:, i]
            predicted_encodings.append(encoding)
            
            
        return torch.stack(predicted_encodings)
    
    def forward(self, U, p_y):
        
        encoding = self.encode(U)
        encoding_w = self.predict_next_w_encodings(encoding, p_y)
        # want to have this agree with U_y, which is [batch_size, window_size, channels, nx, ny]
        # right now, it's [window_size, batch_size, c_size], so transpose dimensions 0 and 1
        # print(encoding_w.shape)
        U = torch.stack([self.decode(encoding_i) for encoding_i in encoding_w])
        
        return U.transpose(0,1)
    
    
surrogate = Surrogate(window_size, latentDim - 3, 3, LIN_model, AE_model.encoder, AE_model.generator)

In [12]:
# note the important difference here
foo = torch.tensor([[1,1],[2,2],[3,3]])
foo, foo.reshape(2,3), foo.transpose(0,1)

(tensor([[1, 1],
         [2, 2],
         [3, 3]]),
 tensor([[1, 1, 2],
         [2, 3, 3]]),
 tensor([[1, 2, 3],
         [1, 2, 3]]))

In [13]:
encoding = surrogate.encode(U_x)
decoding = surrogate.decode(encoding)
assert surrogate.c_size == 16
assert surrogate.p_size == len(p_x[0])
assert encoding.shape[-1] == surrogate.c_size
assert decoding.shape == U_x.shape

In [14]:
U_hat = surrogate.forward(U_x, p_y)
U_hat.shape, U_y.shape, torch.norm(U_hat-U_y,p=1)/torch.norm(U_y)

(torch.Size([20, 5, 2, 128, 96]),
 torch.Size([20, 5, 2, 128, 96]),
 tensor(367.6299, grad_fn=<DivBackward0>))

In [15]:
del surrogate, encoding, decoding, U_hat
surrogate = Surrogate(window_size, latentDim - 3, 3, LIN_model, AE_model.encoder, AE_model.generator).to(device)

In [16]:
surrogate.encoder.conv1.weight[-1]

tensor([[[-0.0615,  0.2370,  0.0193],
         [-0.1209, -0.1067,  0.1109],
         [-0.0105,  0.0456, -0.2153]],

        [[-0.0179,  0.2263, -0.1354],
         [ 0.0473, -0.0644, -0.1862],
         [-0.0807,  0.0856,  0.0642]]], device='cuda:0',
       grad_fn=<SelectBackward>)

In [17]:
max_lr = .0001
start_lr = 5*max_lr/10
#opt = create_opt(max_lr,model)
#lr_scheduler = create_one_cycle(opt,max_lr,epochs,trainDataLoader)
opt = torch.optim.Adam(surrogate.parameters(),lr=max_lr,betas=(.5,.999))
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,patience=patience)

In [18]:
def L1_loss(pred, target):
    return torch.mean(torch.abs(pred - target))


def jacobian_loss(pred, target, device='cpu'):
    return L1_loss(jacobian(pred, device), jacobian(target, device))


def curl_loss(pred, target, device):
    return L1_loss(curl(pred, device), curl(target, device))


L = nn.MSELoss()


def p_loss(pred, target):
    return L(pred[:, -target.shape[1]:], target)


def loss(pred, target, device):
    
    if createStreamFcn:
        pred = stream2uv(pred, device)
        
    L1 = L1_loss(pred, target)
    Lj = 0
    if doJacobian:
        Lj = jacobian_loss(pred, target, device)
        
    return L1 + Lj

In [19]:
def trainEpoch(myDataLoader, tensorboard_writer, model, opt, p_loss, loss,
               metric, lr_scheduler, tensorboard_rate, device,
               tensorboard_recorder_step, total_steps):
    running_loss = 0.0
    running_rmse = 0.0
    total_loss = 0.0
    running_ploss = 0.0
    for i, sampleBatch in enumerate(myDataLoader, start=1):

        # --- Main Training ---
        
        # gpu
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.squeeze().to(device)
        p_x = p_x.squeeze().to(device)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
            

        # zero the parameter gradients
        opt.zero_grad()

        U_hat = model(U_x, p_y)
        pl = 0
        ll = loss(U_hat, U_y, device)
        combined_loss = pl + ll
        combined_loss.backward()
        opt.step()
        
        # loss
        batch_loss = combined_loss.item()
        running_loss += batch_loss
        total_loss += batch_loss
        
        batch_ploss = pl
        running_ploss += batch_ploss

        # --- Metrics Recording ---

        # metrics
        r = metric(U_hat, U_y)
        running_rmse += r

        # record lr change
        total_steps += 1
        tensorboard_writer.add_scalar(tag="LR", scalar_value=opt.param_groups[0]['lr'], global_step=total_steps)
        #lr_scheduler.step()

        # tensorboard writes
        if (i % tensorboard_rate == 0):
            tensorboard_recorder_step += 1
            avg_running_loss = running_loss/tensorboard_rate
            avg_running_rmse = running_rmse/tensorboard_rate
            avg_running_ploss = running_ploss/tensorboard_rate
            tensorboard_writer.add_scalar(tag="Loss", scalar_value=avg_running_loss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag="p_loss", scalar_value=avg_running_ploss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag=metric.__name__, scalar_value=avg_running_rmse, global_step=tensorboard_recorder_step)
            # reset running_loss for the next set of batches. (tensorboard_rate number of batches)
            running_loss = 0.0
            running_rmse = 0.0
            running_ploss = 0.0

    return total_loss, tensorboard_recorder_step, total_steps

In [20]:
try:
    os.mkdir(cps)
except:
    print("checkpoints directory already exists :)")
    
# create a summary writer.
train_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'train'))
test_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'valid'))
tensorboard_recorder_step = 0
total_steps = 0

checkpoints directory already exists :)


In [21]:
writeMessage('---------- Started Training ----------', versionName)
bestLoss = np.infty

if not eval_only:
    for epoch in tqdm(range(1, epochs+1)):  # loop over the dataset multiple times

        writeMessage("--- Epoch {0}/{1} ---".format(epoch, epochs), versionName)

        surrogate.train()
        trainLoss, tensorboard_recorder_step, total_steps = trainEpoch(trainDataLoader, 
                                                                       train_writer, surrogate,
                                                                       opt, p_loss, loss,
                                                                       rmse, lr_scheduler, 
                                                                       tensorboard_rate, device,
                                                                       tensorboard_recorder_step, total_steps)

        writeMessage("trainLoss: {:.4e}".format(trainLoss),versionName)
        writeMessage("LR: {:.4e}".format(opt.param_groups[0]['lr']),versionName)
        if trainLoss < bestLoss:
            bestLoss = trainLoss
            writeMessage("Better trainLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
            torch.save(surrogate.state_dict(), os.path.join(cps,versionName))

    #     model.eval()
    #     valLoss = validEpoch(testDataLoader, test_writer, model, p_loss, loss, rmse, device, tensorboard_recorder_step)
    #     writeMessage("valLoss: {:.4e}".format(valLoss),versionName)

        # checkpoint progress
    #     if valLoss < bestLoss:
    #         bestLoss = valLoss
    #         writeMessage("Better valLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
    #         torch.save(model.state_dict(), os.path.join(cps,versionName))

        lr_scheduler.step(trainLoss)

        if opt.param_groups[0]['lr'] < 1e-8:
            break
    writeMessage('---------- Finished Training ----------', versionName)

  0%|          | 0/1000 [00:00<?, ?it/s]

---------- Started Training ----------
--- Epoch 1/1000 ---




trainLoss: 4.0941e-01
LR: 1.0000e-04
Better trainLoss: 4.0941e-01, Saving models...


  0%|          | 1/1000 [00:14<3:57:36, 14.27s/it]

--- Epoch 2/1000 ---


  0%|          | 2/1000 [00:27<3:53:14, 14.02s/it]

trainLoss: 3.5289e-01
LR: 1.0000e-04
Better trainLoss: 3.5289e-01, Saving models...
--- Epoch 3/1000 ---


  0%|          | 3/1000 [00:41<3:50:11, 13.85s/it]

trainLoss: 3.2235e-01
LR: 1.0000e-04
Better trainLoss: 3.2235e-01, Saving models...
--- Epoch 4/1000 ---


  0%|          | 4/1000 [00:54<3:48:08, 13.74s/it]

trainLoss: 2.9835e-01
LR: 1.0000e-04
Better trainLoss: 2.9835e-01, Saving models...
--- Epoch 5/1000 ---


  0%|          | 5/1000 [01:08<3:48:08, 13.76s/it]

trainLoss: 2.9435e-01
LR: 1.0000e-04
Better trainLoss: 2.9435e-01, Saving models...
--- Epoch 6/1000 ---


  1%|          | 6/1000 [01:22<3:47:05, 13.71s/it]

trainLoss: 2.8774e-01
LR: 1.0000e-04
Better trainLoss: 2.8774e-01, Saving models...
--- Epoch 7/1000 ---


  1%|          | 7/1000 [01:35<3:46:15, 13.67s/it]

trainLoss: 2.8256e-01
LR: 1.0000e-04
Better trainLoss: 2.8256e-01, Saving models...
--- Epoch 8/1000 ---


  1%|          | 8/1000 [01:49<3:45:45, 13.65s/it]

trainLoss: 2.5913e-01
LR: 1.0000e-04
Better trainLoss: 2.5913e-01, Saving models...
--- Epoch 9/1000 ---


  1%|          | 9/1000 [02:02<3:45:14, 13.64s/it]

trainLoss: 2.3177e-01
LR: 1.0000e-04
Better trainLoss: 2.3177e-01, Saving models...
--- Epoch 10/1000 ---


  1%|          | 10/1000 [02:16<3:44:11, 13.59s/it]

trainLoss: 2.3583e-01
LR: 1.0000e-04
--- Epoch 11/1000 ---


  1%|          | 11/1000 [02:29<3:43:26, 13.56s/it]

trainLoss: 2.3752e-01
LR: 1.0000e-04
--- Epoch 12/1000 ---


  1%|          | 12/1000 [02:43<3:43:30, 13.57s/it]

trainLoss: 2.1520e-01
LR: 1.0000e-04
Better trainLoss: 2.1520e-01, Saving models...
--- Epoch 13/1000 ---


  1%|▏         | 13/1000 [02:56<3:42:40, 13.54s/it]

trainLoss: 2.3048e-01
LR: 1.0000e-04
--- Epoch 14/1000 ---


  1%|▏         | 14/1000 [03:10<3:42:02, 13.51s/it]

trainLoss: 2.4872e-01
LR: 1.0000e-04
--- Epoch 15/1000 ---


  2%|▏         | 15/1000 [03:23<3:41:38, 13.50s/it]

trainLoss: 2.2120e-01
LR: 1.0000e-04
--- Epoch 16/1000 ---


  2%|▏         | 16/1000 [03:37<3:41:55, 13.53s/it]

trainLoss: 1.5677e-01
LR: 1.0000e-05
Better trainLoss: 1.5677e-01, Saving models...
--- Epoch 17/1000 ---


  2%|▏         | 17/1000 [03:51<3:42:05, 13.56s/it]

trainLoss: 1.3463e-01
LR: 1.0000e-05
Better trainLoss: 1.3463e-01, Saving models...
--- Epoch 18/1000 ---


  2%|▏         | 18/1000 [04:04<3:42:00, 13.57s/it]

trainLoss: 1.3152e-01
LR: 1.0000e-05
Better trainLoss: 1.3152e-01, Saving models...
--- Epoch 19/1000 ---


  2%|▏         | 19/1000 [04:18<3:42:00, 13.58s/it]

trainLoss: 1.2823e-01
LR: 1.0000e-05
Better trainLoss: 1.2823e-01, Saving models...
--- Epoch 20/1000 ---


  2%|▏         | 20/1000 [04:31<3:42:26, 13.62s/it]

trainLoss: 1.2785e-01
LR: 1.0000e-05
Better trainLoss: 1.2785e-01, Saving models...
--- Epoch 21/1000 ---


  2%|▏         | 21/1000 [04:45<3:42:24, 13.63s/it]

trainLoss: 1.2889e-01
LR: 1.0000e-05
--- Epoch 22/1000 ---


  2%|▏         | 22/1000 [04:59<3:42:29, 13.65s/it]

trainLoss: 1.2525e-01
LR: 1.0000e-05
Better trainLoss: 1.2525e-01, Saving models...
--- Epoch 23/1000 ---


  2%|▏         | 23/1000 [05:12<3:41:35, 13.61s/it]

trainLoss: 1.2537e-01
LR: 1.0000e-05
--- Epoch 24/1000 ---


  2%|▏         | 24/1000 [05:26<3:40:55, 13.58s/it]

trainLoss: 1.2615e-01
LR: 1.0000e-05
--- Epoch 25/1000 ---


  2%|▎         | 25/1000 [05:39<3:40:58, 13.60s/it]

trainLoss: 1.2214e-01
LR: 1.0000e-05
Better trainLoss: 1.2214e-01, Saving models...
--- Epoch 26/1000 ---


  3%|▎         | 26/1000 [05:53<3:41:05, 13.62s/it]

trainLoss: 1.2121e-01
LR: 1.0000e-05
Better trainLoss: 1.2121e-01, Saving models...
--- Epoch 27/1000 ---


  3%|▎         | 27/1000 [06:07<3:40:29, 13.60s/it]

trainLoss: 1.2324e-01
LR: 1.0000e-05
--- Epoch 28/1000 ---


  3%|▎         | 28/1000 [06:20<3:40:34, 13.62s/it]

trainLoss: 1.2010e-01
LR: 1.0000e-05
Better trainLoss: 1.2010e-01, Saving models...
--- Epoch 29/1000 ---


  3%|▎         | 29/1000 [06:34<3:40:27, 13.62s/it]

trainLoss: 1.1935e-01
LR: 1.0000e-05
Better trainLoss: 1.1935e-01, Saving models...
--- Epoch 30/1000 ---


  3%|▎         | 30/1000 [06:48<3:40:27, 13.64s/it]

trainLoss: 1.1895e-01
LR: 1.0000e-05
Better trainLoss: 1.1895e-01, Saving models...
--- Epoch 31/1000 ---


  3%|▎         | 31/1000 [07:01<3:39:46, 13.61s/it]

trainLoss: 1.1992e-01
LR: 1.0000e-05
--- Epoch 32/1000 ---


  3%|▎         | 32/1000 [07:15<3:39:07, 13.58s/it]

trainLoss: 1.1975e-01
LR: 1.0000e-05
--- Epoch 33/1000 ---


  3%|▎         | 33/1000 [07:28<3:39:23, 13.61s/it]

trainLoss: 1.1614e-01
LR: 1.0000e-05
Better trainLoss: 1.1614e-01, Saving models...
--- Epoch 34/1000 ---


  3%|▎         | 34/1000 [07:42<3:40:16, 13.68s/it]

trainLoss: 1.1505e-01
LR: 1.0000e-05
Better trainLoss: 1.1505e-01, Saving models...
--- Epoch 35/1000 ---


  4%|▎         | 35/1000 [07:56<3:39:41, 13.66s/it]

trainLoss: 1.1646e-01
LR: 1.0000e-05
--- Epoch 36/1000 ---


  4%|▎         | 36/1000 [08:09<3:38:54, 13.62s/it]

trainLoss: 1.1742e-01
LR: 1.0000e-05
--- Epoch 37/1000 ---


  4%|▎         | 37/1000 [08:23<3:38:14, 13.60s/it]

trainLoss: 1.1747e-01
LR: 1.0000e-05
--- Epoch 38/1000 ---


  4%|▍         | 38/1000 [08:37<3:38:27, 13.63s/it]

trainLoss: 1.1264e-01
LR: 1.0000e-06
Better trainLoss: 1.1264e-01, Saving models...
--- Epoch 39/1000 ---


  4%|▍         | 39/1000 [08:50<3:38:29, 13.64s/it]

trainLoss: 1.1112e-01
LR: 1.0000e-06
Better trainLoss: 1.1112e-01, Saving models...
--- Epoch 40/1000 ---


  4%|▍         | 40/1000 [09:04<3:37:48, 13.61s/it]

trainLoss: 1.1286e-01
LR: 1.0000e-06
--- Epoch 41/1000 ---


  4%|▍         | 41/1000 [09:17<3:37:05, 13.58s/it]

trainLoss: 1.1304e-01
LR: 1.0000e-06
--- Epoch 42/1000 ---


  4%|▍         | 42/1000 [09:31<3:36:37, 13.57s/it]

trainLoss: 1.1739e-01
LR: 1.0000e-06
--- Epoch 43/1000 ---


  4%|▍         | 43/1000 [09:44<3:36:04, 13.55s/it]

trainLoss: 1.1546e-01
LR: 1.0000e-07
--- Epoch 44/1000 ---


  4%|▍         | 44/1000 [09:58<3:36:16, 13.57s/it]

trainLoss: 1.0560e-01
LR: 1.0000e-07
Better trainLoss: 1.0560e-01, Saving models...
--- Epoch 45/1000 ---


  4%|▍         | 45/1000 [10:12<3:36:05, 13.58s/it]

trainLoss: 1.0742e-01
LR: 1.0000e-07
--- Epoch 46/1000 ---


  5%|▍         | 46/1000 [10:25<3:36:02, 13.59s/it]

trainLoss: 1.0842e-01
LR: 1.0000e-07
--- Epoch 47/1000 ---


  5%|▍         | 47/1000 [10:39<3:35:31, 13.57s/it]

trainLoss: 1.1227e-01
LR: 1.0000e-07
--- Epoch 48/1000 ---


  5%|▍         | 48/1000 [10:52<3:35:10, 13.56s/it]

trainLoss: 1.1495e-01
LR: 1.0000e-08
--- Epoch 49/1000 ---


  5%|▍         | 49/1000 [11:06<3:35:32, 13.60s/it]

trainLoss: 1.0987e-01
LR: 1.0000e-08
--- Epoch 50/1000 ---


  5%|▌         | 50/1000 [11:20<3:35:17, 13.60s/it]

trainLoss: 1.1391e-01
LR: 1.0000e-08
--- Epoch 51/1000 ---


  5%|▌         | 51/1000 [11:33<3:34:43, 13.58s/it]

trainLoss: 1.0849e-01
LR: 1.0000e-08
--- Epoch 52/1000 ---


  5%|▌         | 52/1000 [11:47<3:34:15, 13.56s/it]

trainLoss: 1.0655e-01
LR: 1.0000e-08
--- Epoch 53/1000 ---


  5%|▌         | 53/1000 [12:00<3:34:23, 13.58s/it]

trainLoss: 1.0518e-01
LR: 1.0000e-08
Better trainLoss: 1.0518e-01, Saving models...
--- Epoch 54/1000 ---


  5%|▌         | 54/1000 [12:14<3:34:06, 13.58s/it]

trainLoss: 1.1470e-01
LR: 1.0000e-08
--- Epoch 55/1000 ---


  6%|▌         | 55/1000 [12:27<3:33:45, 13.57s/it]

trainLoss: 1.0843e-01
LR: 1.0000e-08
--- Epoch 56/1000 ---


  6%|▌         | 56/1000 [12:41<3:33:28, 13.57s/it]

trainLoss: 1.0648e-01
LR: 1.0000e-08
--- Epoch 57/1000 ---


  6%|▌         | 57/1000 [12:54<3:33:06, 13.56s/it]

trainLoss: 1.0774e-01
LR: 1.0000e-08
--- Epoch 58/1000 ---


  6%|▌         | 58/1000 [13:08<3:32:48, 13.55s/it]

trainLoss: 1.1304e-01
LR: 1.0000e-08
--- Epoch 59/1000 ---


  6%|▌         | 59/1000 [13:21<3:32:20, 13.54s/it]

trainLoss: 1.0606e-01
LR: 1.0000e-08
--- Epoch 60/1000 ---


  6%|▌         | 60/1000 [13:35<3:32:09, 13.54s/it]

trainLoss: 1.0799e-01
LR: 1.0000e-08
--- Epoch 61/1000 ---


  6%|▌         | 61/1000 [13:49<3:31:49, 13.54s/it]

trainLoss: 1.0649e-01
LR: 1.0000e-08
--- Epoch 62/1000 ---


  6%|▌         | 62/1000 [14:02<3:31:21, 13.52s/it]

trainLoss: 1.0958e-01
LR: 1.0000e-08
--- Epoch 63/1000 ---


  6%|▋         | 63/1000 [14:16<3:31:16, 13.53s/it]

trainLoss: 1.1143e-01
LR: 1.0000e-08
--- Epoch 64/1000 ---


  6%|▋         | 64/1000 [14:29<3:31:02, 13.53s/it]

trainLoss: 1.2240e-01
LR: 1.0000e-08
--- Epoch 65/1000 ---


  6%|▋         | 65/1000 [14:43<3:31:35, 13.58s/it]

trainLoss: 1.0455e-01
LR: 1.0000e-08
Better trainLoss: 1.0455e-01, Saving models...
--- Epoch 66/1000 ---


  7%|▋         | 66/1000 [14:56<3:31:10, 13.57s/it]

trainLoss: 1.1187e-01
LR: 1.0000e-08
--- Epoch 67/1000 ---


  7%|▋         | 67/1000 [15:10<3:30:52, 13.56s/it]

trainLoss: 1.1736e-01
LR: 1.0000e-08
--- Epoch 68/1000 ---


  7%|▋         | 68/1000 [15:23<3:30:33, 13.56s/it]

trainLoss: 1.0769e-01
LR: 1.0000e-08
--- Epoch 69/1000 ---


  7%|▋         | 69/1000 [15:37<3:30:24, 13.56s/it]

trainLoss: 1.0676e-01
LR: 1.0000e-08
--- Epoch 70/1000 ---


  7%|▋         | 70/1000 [15:51<3:30:01, 13.55s/it]

trainLoss: 1.1484e-01
LR: 1.0000e-08
--- Epoch 71/1000 ---


  7%|▋         | 71/1000 [16:04<3:29:43, 13.55s/it]

trainLoss: 1.0693e-01
LR: 1.0000e-08
--- Epoch 72/1000 ---


  7%|▋         | 72/1000 [16:18<3:29:27, 13.54s/it]

trainLoss: 1.0625e-01
LR: 1.0000e-08
--- Epoch 73/1000 ---


  7%|▋         | 73/1000 [16:31<3:29:13, 13.54s/it]

trainLoss: 1.1084e-01
LR: 1.0000e-08
--- Epoch 74/1000 ---


  7%|▋         | 74/1000 [16:45<3:28:55, 13.54s/it]

trainLoss: 1.0985e-01
LR: 1.0000e-08
--- Epoch 75/1000 ---


  8%|▊         | 75/1000 [16:59<3:30:11, 13.63s/it]

trainLoss: 1.0894e-01
LR: 1.0000e-08
--- Epoch 76/1000 ---


  8%|▊         | 76/1000 [17:12<3:29:35, 13.61s/it]

trainLoss: 1.1577e-01
LR: 1.0000e-08
--- Epoch 77/1000 ---


  8%|▊         | 77/1000 [17:26<3:29:02, 13.59s/it]

trainLoss: 1.0686e-01
LR: 1.0000e-08
--- Epoch 78/1000 ---


  8%|▊         | 78/1000 [17:39<3:28:28, 13.57s/it]

trainLoss: 1.1093e-01
LR: 1.0000e-08
--- Epoch 79/1000 ---


  8%|▊         | 79/1000 [17:53<3:28:05, 13.56s/it]

trainLoss: 1.0714e-01
LR: 1.0000e-08
--- Epoch 80/1000 ---


  8%|▊         | 80/1000 [18:06<3:27:39, 13.54s/it]

trainLoss: 1.1065e-01
LR: 1.0000e-08
--- Epoch 81/1000 ---


  8%|▊         | 81/1000 [18:20<3:27:21, 13.54s/it]

trainLoss: 1.1577e-01
LR: 1.0000e-08
--- Epoch 82/1000 ---


  8%|▊         | 82/1000 [18:33<3:27:09, 13.54s/it]

trainLoss: 1.0693e-01
LR: 1.0000e-08
--- Epoch 83/1000 ---


  8%|▊         | 83/1000 [18:47<3:26:48, 13.53s/it]

trainLoss: 1.0563e-01
LR: 1.0000e-08
--- Epoch 84/1000 ---


  8%|▊         | 84/1000 [19:00<3:26:28, 13.53s/it]

trainLoss: 1.1657e-01
LR: 1.0000e-08
--- Epoch 85/1000 ---


  8%|▊         | 85/1000 [19:14<3:26:17, 13.53s/it]

trainLoss: 1.1302e-01
LR: 1.0000e-08
--- Epoch 86/1000 ---


  9%|▊         | 86/1000 [19:27<3:26:03, 13.53s/it]

trainLoss: 1.1198e-01
LR: 1.0000e-08
--- Epoch 87/1000 ---


  9%|▊         | 87/1000 [19:41<3:25:50, 13.53s/it]

trainLoss: 1.1197e-01
LR: 1.0000e-08
--- Epoch 88/1000 ---


  9%|▉         | 88/1000 [19:54<3:25:35, 13.53s/it]

trainLoss: 1.0777e-01
LR: 1.0000e-08
--- Epoch 89/1000 ---


  9%|▉         | 89/1000 [20:08<3:25:24, 13.53s/it]

trainLoss: 1.1139e-01
LR: 1.0000e-08
--- Epoch 90/1000 ---


  9%|▉         | 90/1000 [20:21<3:25:08, 13.53s/it]

trainLoss: 1.0851e-01
LR: 1.0000e-08
--- Epoch 91/1000 ---


  9%|▉         | 91/1000 [20:35<3:24:58, 13.53s/it]

trainLoss: 1.1160e-01
LR: 1.0000e-08
--- Epoch 92/1000 ---


  9%|▉         | 92/1000 [20:49<3:24:46, 13.53s/it]

trainLoss: 1.1299e-01
LR: 1.0000e-08
--- Epoch 93/1000 ---


  9%|▉         | 93/1000 [21:02<3:24:25, 13.52s/it]

trainLoss: 1.1194e-01
LR: 1.0000e-08
--- Epoch 94/1000 ---


  9%|▉         | 94/1000 [21:16<3:24:18, 13.53s/it]

trainLoss: 1.1791e-01
LR: 1.0000e-08
--- Epoch 95/1000 ---


 10%|▉         | 95/1000 [21:29<3:24:04, 13.53s/it]

trainLoss: 1.0781e-01
LR: 1.0000e-08
--- Epoch 96/1000 ---


 10%|▉         | 96/1000 [21:43<3:23:53, 13.53s/it]

trainLoss: 1.1153e-01
LR: 1.0000e-08
--- Epoch 97/1000 ---


 10%|▉         | 97/1000 [21:56<3:23:35, 13.53s/it]

trainLoss: 1.0481e-01
LR: 1.0000e-08
--- Epoch 98/1000 ---


 10%|▉         | 98/1000 [22:10<3:23:25, 13.53s/it]

trainLoss: 1.0939e-01
LR: 1.0000e-08
--- Epoch 99/1000 ---


 10%|▉         | 99/1000 [22:23<3:23:09, 13.53s/it]

trainLoss: 1.1621e-01
LR: 1.0000e-08
--- Epoch 100/1000 ---


 10%|█         | 100/1000 [22:37<3:22:50, 13.52s/it]

trainLoss: 1.0943e-01
LR: 1.0000e-08
--- Epoch 101/1000 ---


 10%|█         | 101/1000 [22:50<3:22:48, 13.54s/it]

trainLoss: 1.1363e-01
LR: 1.0000e-08
--- Epoch 102/1000 ---


 10%|█         | 102/1000 [23:04<3:22:30, 13.53s/it]

trainLoss: 1.1007e-01
LR: 1.0000e-08
--- Epoch 103/1000 ---


 10%|█         | 103/1000 [23:17<3:22:55, 13.57s/it]

trainLoss: 1.1018e-01
LR: 1.0000e-08
--- Epoch 104/1000 ---


 10%|█         | 104/1000 [23:31<3:22:33, 13.56s/it]

trainLoss: 1.1189e-01
LR: 1.0000e-08
--- Epoch 105/1000 ---


 10%|█         | 105/1000 [23:45<3:22:15, 13.56s/it]

trainLoss: 1.1167e-01
LR: 1.0000e-08
--- Epoch 106/1000 ---


 11%|█         | 106/1000 [23:58<3:21:57, 13.55s/it]

trainLoss: 1.0857e-01
LR: 1.0000e-08
--- Epoch 107/1000 ---


 11%|█         | 107/1000 [24:12<3:21:33, 13.54s/it]

trainLoss: 1.0987e-01
LR: 1.0000e-08
--- Epoch 108/1000 ---


 11%|█         | 108/1000 [24:25<3:21:23, 13.55s/it]

trainLoss: 1.1050e-01
LR: 1.0000e-08
--- Epoch 109/1000 ---


 11%|█         | 109/1000 [24:39<3:22:29, 13.64s/it]

trainLoss: 1.1201e-01
LR: 1.0000e-08
--- Epoch 110/1000 ---


 11%|█         | 110/1000 [24:53<3:22:18, 13.64s/it]

trainLoss: 1.1667e-01
LR: 1.0000e-08
--- Epoch 111/1000 ---


 11%|█         | 111/1000 [25:06<3:21:47, 13.62s/it]

trainLoss: 1.0538e-01
LR: 1.0000e-08
--- Epoch 112/1000 ---


 11%|█         | 112/1000 [25:20<3:21:10, 13.59s/it]

trainLoss: 1.0458e-01
LR: 1.0000e-08
--- Epoch 113/1000 ---


 11%|█▏        | 113/1000 [25:33<3:20:42, 13.58s/it]

trainLoss: 1.1149e-01
LR: 1.0000e-08
--- Epoch 114/1000 ---


 11%|█▏        | 114/1000 [25:47<3:20:21, 13.57s/it]

trainLoss: 1.1037e-01
LR: 1.0000e-08
--- Epoch 115/1000 ---


 12%|█▏        | 115/1000 [26:00<3:19:59, 13.56s/it]

trainLoss: 1.0935e-01
LR: 1.0000e-08
--- Epoch 116/1000 ---


 12%|█▏        | 116/1000 [26:14<3:19:37, 13.55s/it]

trainLoss: 1.1392e-01
LR: 1.0000e-08
--- Epoch 117/1000 ---


 12%|█▏        | 117/1000 [26:27<3:19:22, 13.55s/it]

trainLoss: 1.0785e-01
LR: 1.0000e-08
--- Epoch 118/1000 ---


 12%|█▏        | 118/1000 [26:41<3:19:06, 13.54s/it]

trainLoss: 1.1535e-01
LR: 1.0000e-08
--- Epoch 119/1000 ---


 12%|█▏        | 119/1000 [26:55<3:18:49, 13.54s/it]

trainLoss: 1.0964e-01
LR: 1.0000e-08
--- Epoch 120/1000 ---


 12%|█▏        | 120/1000 [27:08<3:19:02, 13.57s/it]

trainLoss: 1.0964e-01
LR: 1.0000e-08
--- Epoch 121/1000 ---


 12%|█▏        | 121/1000 [27:22<3:19:41, 13.63s/it]

trainLoss: 1.1108e-01
LR: 1.0000e-08
--- Epoch 122/1000 ---


 12%|█▏        | 122/1000 [27:36<3:19:41, 13.65s/it]

trainLoss: 1.1158e-01
LR: 1.0000e-08
--- Epoch 123/1000 ---


 12%|█▏        | 123/1000 [27:49<3:19:06, 13.62s/it]

trainLoss: 1.1609e-01
LR: 1.0000e-08
--- Epoch 124/1000 ---


 12%|█▏        | 124/1000 [28:03<3:18:34, 13.60s/it]

trainLoss: 1.0838e-01
LR: 1.0000e-08
--- Epoch 125/1000 ---


 12%|█▎        | 125/1000 [28:16<3:18:17, 13.60s/it]

trainLoss: 1.0633e-01
LR: 1.0000e-08
--- Epoch 126/1000 ---


 13%|█▎        | 126/1000 [28:30<3:18:03, 13.60s/it]

trainLoss: 1.1077e-01
LR: 1.0000e-08
--- Epoch 127/1000 ---


 13%|█▎        | 127/1000 [28:44<3:17:44, 13.59s/it]

trainLoss: 1.0788e-01
LR: 1.0000e-08
--- Epoch 128/1000 ---


 13%|█▎        | 128/1000 [28:57<3:17:26, 13.59s/it]

trainLoss: 1.0669e-01
LR: 1.0000e-08
--- Epoch 129/1000 ---


 13%|█▎        | 129/1000 [29:11<3:17:12, 13.59s/it]

trainLoss: 1.0609e-01
LR: 1.0000e-08
--- Epoch 130/1000 ---


 13%|█▎        | 130/1000 [29:24<3:17:03, 13.59s/it]

trainLoss: 1.0981e-01
LR: 1.0000e-08
--- Epoch 131/1000 ---


 13%|█▎        | 131/1000 [29:38<3:16:45, 13.58s/it]

trainLoss: 1.1074e-01
LR: 1.0000e-08
--- Epoch 132/1000 ---


 13%|█▎        | 132/1000 [29:52<3:17:05, 13.62s/it]

trainLoss: 9.9201e-02
LR: 1.0000e-08
Better trainLoss: 9.9201e-02, Saving models...
--- Epoch 133/1000 ---


 13%|█▎        | 133/1000 [30:05<3:16:37, 13.61s/it]

trainLoss: 1.1440e-01
LR: 1.0000e-08
--- Epoch 134/1000 ---


 13%|█▎        | 134/1000 [30:19<3:16:18, 13.60s/it]

trainLoss: 1.1169e-01
LR: 1.0000e-08
--- Epoch 135/1000 ---


 14%|█▎        | 135/1000 [30:32<3:15:58, 13.59s/it]

trainLoss: 1.0809e-01
LR: 1.0000e-08
--- Epoch 136/1000 ---


 14%|█▎        | 136/1000 [30:46<3:16:36, 13.65s/it]

trainLoss: 1.1376e-01
LR: 1.0000e-08
--- Epoch 137/1000 ---


 14%|█▎        | 137/1000 [31:00<3:15:52, 13.62s/it]

trainLoss: 1.0874e-01
LR: 1.0000e-08
--- Epoch 138/1000 ---


 14%|█▍        | 138/1000 [31:13<3:15:26, 13.60s/it]

trainLoss: 1.1162e-01
LR: 1.0000e-08
--- Epoch 139/1000 ---


 14%|█▍        | 139/1000 [31:27<3:14:58, 13.59s/it]

trainLoss: 1.1169e-01
LR: 1.0000e-08
--- Epoch 140/1000 ---


 14%|█▍        | 140/1000 [31:40<3:14:45, 13.59s/it]

trainLoss: 1.0864e-01
LR: 1.0000e-08
--- Epoch 141/1000 ---


 14%|█▍        | 141/1000 [31:54<3:14:29, 13.59s/it]

trainLoss: 1.1013e-01
LR: 1.0000e-08
--- Epoch 142/1000 ---


 14%|█▍        | 142/1000 [32:07<3:14:14, 13.58s/it]

trainLoss: 1.0900e-01
LR: 1.0000e-08
--- Epoch 143/1000 ---


 14%|█▍        | 143/1000 [32:21<3:13:49, 13.57s/it]

trainLoss: 1.0433e-01
LR: 1.0000e-08
--- Epoch 144/1000 ---


 14%|█▍        | 144/1000 [32:35<3:13:41, 13.58s/it]

trainLoss: 1.1481e-01
LR: 1.0000e-08
--- Epoch 145/1000 ---


 14%|█▍        | 145/1000 [32:48<3:13:20, 13.57s/it]

trainLoss: 1.1806e-01
LR: 1.0000e-08
--- Epoch 146/1000 ---


 15%|█▍        | 146/1000 [33:02<3:13:01, 13.56s/it]

trainLoss: 1.1352e-01
LR: 1.0000e-08
--- Epoch 147/1000 ---


 15%|█▍        | 147/1000 [33:15<3:12:46, 13.56s/it]

trainLoss: 1.0595e-01
LR: 1.0000e-08
--- Epoch 148/1000 ---


 15%|█▍        | 148/1000 [33:29<3:12:25, 13.55s/it]

trainLoss: 1.1570e-01
LR: 1.0000e-08
--- Epoch 149/1000 ---


 15%|█▍        | 149/1000 [33:42<3:12:18, 13.56s/it]

trainLoss: 1.0769e-01
LR: 1.0000e-08
--- Epoch 150/1000 ---


 15%|█▌        | 150/1000 [33:56<3:12:04, 13.56s/it]

trainLoss: 1.1212e-01
LR: 1.0000e-08
--- Epoch 151/1000 ---


 15%|█▌        | 151/1000 [34:10<3:12:51, 13.63s/it]

trainLoss: 1.0840e-01
LR: 1.0000e-08
--- Epoch 152/1000 ---


 15%|█▌        | 152/1000 [34:23<3:12:30, 13.62s/it]

trainLoss: 1.0961e-01
LR: 1.0000e-08
--- Epoch 153/1000 ---


 15%|█▌        | 153/1000 [34:37<3:12:05, 13.61s/it]

trainLoss: 1.1439e-01
LR: 1.0000e-08
--- Epoch 154/1000 ---


 15%|█▌        | 154/1000 [34:50<3:11:40, 13.59s/it]

trainLoss: 1.1293e-01
LR: 1.0000e-08
--- Epoch 155/1000 ---


 16%|█▌        | 155/1000 [35:04<3:11:19, 13.59s/it]

trainLoss: 1.1247e-01
LR: 1.0000e-08
--- Epoch 156/1000 ---


 16%|█▌        | 156/1000 [35:18<3:11:00, 13.58s/it]

trainLoss: 1.1498e-01
LR: 1.0000e-08
--- Epoch 157/1000 ---


 16%|█▌        | 157/1000 [35:31<3:10:41, 13.57s/it]

trainLoss: 1.1024e-01
LR: 1.0000e-08
--- Epoch 158/1000 ---


 16%|█▌        | 158/1000 [35:45<3:10:26, 13.57s/it]

trainLoss: 1.0838e-01
LR: 1.0000e-08
--- Epoch 159/1000 ---


 16%|█▌        | 159/1000 [35:58<3:10:06, 13.56s/it]

trainLoss: 1.0857e-01
LR: 1.0000e-08
--- Epoch 160/1000 ---


 16%|█▌        | 160/1000 [36:12<3:09:52, 13.56s/it]

trainLoss: 1.1273e-01
LR: 1.0000e-08
--- Epoch 161/1000 ---


 16%|█▌        | 161/1000 [36:25<3:09:40, 13.56s/it]

trainLoss: 1.1456e-01
LR: 1.0000e-08
--- Epoch 162/1000 ---


 16%|█▌        | 162/1000 [36:39<3:09:26, 13.56s/it]

trainLoss: 1.0871e-01
LR: 1.0000e-08
--- Epoch 163/1000 ---


 16%|█▋        | 163/1000 [36:53<3:09:05, 13.56s/it]

trainLoss: 1.1386e-01
LR: 1.0000e-08
--- Epoch 164/1000 ---


 16%|█▋        | 163/1000 [37:01<3:10:08, 13.63s/it]


KeyboardInterrupt: 

In [23]:
surrogate.load_state_dict(torch.load(os.path.join(cps,versionName)))

<All keys matched successfully>

In [26]:
surrogate.eval()
U_hats = []
Us = []
for i, sampleBatch in enumerate(testDataLoader, start=1):

    # gpu
    U_x, U_y, p_x, p_y = sampleBatch
    U_x = U_x.squeeze().to(device)
    p_x = p_x.squeeze().to(device)
    U_y = U_y.to(device)
    p_y = p_y.to(device)
    with torch.no_grad():
        Us.append(U_y.detach().cpu())
        
        U_hat = surrogate(U_x, p_y)
                    
        U_hats.append(U_hat.detach().cpu())
        
        
Real_U = torch.stack(Us)
#Real_X_img = convertSimToImage(Real_X)

Surr_U = torch.stack(U_hats)
#Surr_X_img = convertSimToImage(Surr_X)



In [30]:
rel_error = torch.norm(Real_U - Surr_U)/torch.norm(Real_U)
rel_error

tensor(0.5263)