In [1]:
# EXPORT
# --- Must haves ---
import os, sys
sys.path.append('..')

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.cuda as cuda
import torch.nn as nn
import torchvision
import torch.nn.functional as F

from surrogates4sims.mantaflowDatasets import MantaFlowDataset, getSingleSim, createMantaFlowTrainTest

from surrogates4sims.utils import create_opt, create_one_cycle, find_lr, printNumModelParams, \
                                    rmse, writeMessage, plotSampleWprediction, plotSampleWpredictionByChannel, \
                                    plotSample, curl, jacobian, stream2uv, create_movie, convertSimToImage

from surrogates4sims.models import Generator, Encoder, AE_no_P, AE_xhat_z, AE_xhat_zV2

from surrogates4sims.train import trainEpoch, validEpoch

import numpy as np
from tqdm import tqdm
from copy import deepcopy

In [2]:
# data 
eval_only=False
DEBUG = False
# model name, for tensorboard recording and checkpointing purposes.
versionName = "end_to_end_plateau_train"

# GPU Numbers to use. Comma seprate them for multi-GPUs.
gpu_ids = "2,3"
versionName = versionName + '_GPUs{}'.format(gpu_ids.replace(',',''))
# path to load model weights.
pretrained_path = None

# rate at which to record metrics. (number of batches to average over when recording metrics, e.g. "every 5 batches")
tensorboard_rate = 5

# number of epochs to train. This is defined here so we can use the OneCycle LR Scheduler.
epochs = 1000

# Data Directory
dataDirec = '/data/mantaFlowSim/data/smoke_pos21_size5_f200/v'
reverseXY = False 

# checkpoint directory
cps = 'cps'
tensorboard_direc = "tb"

findLRs = False  

# hyper-params
seed = 1234
np.random.seed(seed)
testSplit = .1
bz = 40
numSamplesToKeep = np.infty #if not debugging
latentDim = 16
window_size = 5
filters = 128
num_conv = 4 # breaks when less than 2
simLen = 200
stack = True
simVizIndex = 0 # sim in the test set to visualize
createStreamFcn = False
doJacobian = False
repeat = 0
skip_connection = False
patience = 2
if DEBUG:
    epochs = 10000
    numSamplesToKeep = bz
    
versionName = versionName + '_latentDim{}_filters{}_bz{}_numConv{}_stream{}_jacobian{}_epochs{}_stack{}'.format(latentDim,filters,bz,num_conv,createStreamFcn,doJacobian,epochs,stack)
versionName

'end_to_end_plateau_train_GPUs23_latentDim16_filters128_bz40_numConv4_streamFalse_jacobianFalse_epochs1000_stackTrue'

In [3]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]=gpu_ids
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

trainData, testData = createMantaFlowTrainTest(dataDirec,simLen,testSplit,seed)
print((len(trainData),len(testData)))

Using device: cuda
(19000, 2000)


In [4]:
class MantaFlowDataset(Dataset):
    def __init__(self, 
                 dataDirec='/data/mantaFlowSim/data/smoke_pos21_size5_f200/v',
                 numToKeep=np.infty,transform=None, reverseXY=False, preprocess=True, AE=False,
                 w = 1, simLen = 200): 
        if type(dataDirec) == list:
            self.files = dataDirec
        else:
            self.files = glob(os.path.join(dataDirec,'*.npz'))
        self.dataDirec = dataDirec
        self.numToKeep = numToKeep
        self.transform = transform
        self.reverseXY = reverseXY
        self.AE = AE
        self.w = w
        self.simLen = simLen
        self.data = []
 
        if numToKeep < len(self.files):
            self.files = self.files[:numToKeep]
        for f in tqdm(self.files):
            X,y = self.loadfile(f)
            
            if preprocess:
                X,y = self.preprocessFcn(X,y)
                
            if reverseXY:
                self.data.append((y,X))
            else:
                self.data.append((X,y))

    def loadfile(self,fn):
        A = np.load(fn)
        X = A['x'].astype('float32')
        X = np.rollaxis(X,-1)
        y = A['y'].astype('float32')
        return X,y

    def preprocessFcn(self,X,y):
        x_range = 11.953
        X /= x_range
        y_range = [[0.2, 0.8], [0.04, 0.12], [0.0, 199.0]]
        for i, ri in enumerate(y_range):
            y[i] = (y[i]-ri[0]) / (ri[1]-ri[0]) * 2 - 1
        return X,y
    
    def __len__(self):
        return len(self.data)

    def plot(self,idx,savefig=False):
        X, label  = self.data[idx]
        if self.reverseXY:
            X = label
            
        plt.figure(figsize=(20,10))
        
        plt.subplot(211)
        fn = self.files[idx].replace('.npz','')
        title = '{} channel 0'.format(fn)
        plt.title(title)
        plt.imshow(X[0][::-1])
        plt.colorbar()
        
        plt.subplot(212)
        title = '{} channel 1'.format(fn)
        plt.title(title)
        plt.imshow(X[1][::-1])
        plt.colorbar()
        
        if savefig:
            title = title.replace(' ','_') + '.png'
            plt.savefig(title, dpi=300)
            plt.close()
        else:
            plt.show()
    
    def __getitem__(self, idx):
        q = idx // self.simLen
        r_idx = np.random.randint(0,self.simLen-self.w)
        x = self.data[q*simLen + r_idx : q*simLen + r_idx + 1]
        y = self.data[q*simLen + r_idx + 1 : q*simLen + r_idx + 1 + self.w]
        # to unpack this data into X (image) and p (cfg settings) arrays, use the following code
        U_x, p_x = zip(*x)
        U_y, p_y = zip(*y)
        return np.array(U_x), np.array(U_y), np.array(p_x), np.array(p_y)

In [5]:
# datasets may be smaller because: numSamplesToKeep 
testDataset = MantaFlowDataset(testData, reverseXY=reverseXY, numToKeep=numSamplesToKeep, AE=False,
                               w=simLen-1, simLen=simLen)
trainDataset = MantaFlowDataset(trainData, reverseXY=reverseXY,numToKeep=numSamplesToKeep, AE=False,
                                w=window_size, simLen=simLen)
len(trainDataset), len(testDataset)

trainDataLoader = DataLoader(dataset=trainDataset, batch_size=bz, shuffle=True, drop_last=True, pin_memory = True)
testDataLoader = DataLoader(dataset=testDataset, batch_size=bz, pin_memory=True)
# only use the first frame of each simulation as the input (U_x) for the full simulation test dataloader, 
# the next 199 frames are stored in the targets (U_y).
# added this on 10/27/2020
first_frame_testDataset = torch.utils.data.Subset(testDataset, range(0, len(testDataset), simLen))
simulation_testDataLoader = DataLoader(dataset= first_frame_testDataset, batch_size=2)

U_x, U_y, p_x, p_y = next(iter(trainDataLoader))
print(U_x.shape, p_x.shape)
U_x = U_x.squeeze(1) # squeeze away the window dimension for inputs, not targets (e.g., not U_y)
p_x = p_x.squeeze(1) # squeeze away the window dimension for inputs, not targets (e.g., not U_y)
U_x.shape, U_y.shape, p_x.shape, p_y.shape

100%|██████████| 2000/2000 [00:04<00:00, 496.02it/s]
100%|██████████| 19000/19000 [00:56<00:00, 336.39it/s]


torch.Size([40, 1, 2, 128, 96]) torch.Size([40, 1, 3])


(torch.Size([40, 2, 128, 96]),
 torch.Size([40, 5, 2, 128, 96]),
 torch.Size([40, 3]),
 torch.Size([40, 5, 3]))

In [6]:
# confirm that timestep number in p_y is contiguous within a window
((p_y[:,:,2]+1)/2 * 199)[:10]

tensor([[ 65.0000,  66.0000,  67.0000,  68.0000,  69.0000],
        [151.0000, 152.0000, 153.0000, 154.0000, 155.0000],
        [185.0000, 186.0000, 187.0000, 188.0000, 189.0000],
        [137.0000, 138.0000, 139.0000, 140.0000, 141.0000],
        [173.0000, 174.0000, 175.0000, 176.0000, 177.0000],
        [153.0000, 154.0000, 155.0000, 156.0000, 157.0000],
        [ 16.0000,  17.0000,  18.0000,  19.0000,  20.0000],
        [ 73.0000,  74.0000,  75.0000,  76.0000,  77.0000],
        [131.0000, 132.0000, 133.0000, 134.0000, 135.0000],
        [145.0000, 146.0000, 147.0000, 148.0000, 149.0000]])

In [7]:
# we are now subsetting this data to only include the first frame in each sim, p_y and U_y include next 199 frames
print(len(simulation_testDataLoader), len(simulation_testDataLoader.dataset))
for batch in simulation_testDataLoader:
    test_U_x, test_U_y, test_p_x, test_p_y = batch
    assert (test_p_x[0,:,2]+1)/2 * 199 == 0
    assert (test_p_x[1,:,2]+1)/2 * 199 == 0

print(((test_p_y[0,:,2]+1)/2 * 199))
test_U_x.shape, test_U_y.shape, test_p_x.shape, test_p_y.shape

5 10
tensor([  1.0000,   2.0000,   3.0000,   4.0000,   5.0000,   6.0000,   7.0000,
          8.0000,   9.0000,  10.0000,  11.0000,  12.0000,  13.0000,  14.0000,
         15.0000,  16.0000,  17.0000,  18.0000,  19.0000,  20.0000,  21.0000,
         22.0000,  23.0000,  24.0000,  25.0000,  26.0000,  27.0000,  28.0000,
         29.0000,  30.0000,  31.0000,  32.0000,  33.0000,  34.0000,  35.0000,
         36.0000,  37.0000,  38.0000,  39.0000,  40.0000,  41.0000,  42.0000,
         43.0000,  44.0000,  45.0000,  46.0000,  47.0000,  48.0000,  49.0000,
         50.0000,  51.0000,  52.0000,  53.0000,  54.0000,  55.0000,  56.0000,
         57.0000,  58.0000,  59.0000,  60.0000,  61.0000,  62.0000,  63.0000,
         64.0000,  65.0000,  66.0000,  67.0000,  68.0000,  69.0000,  70.0000,
         71.0000,  72.0000,  73.0000,  74.0000,  75.0000,  76.0000,  77.0000,
         78.0000,  79.0000,  80.0000,  81.0000,  82.0000,  83.0000,  84.0000,
         85.0000,  86.0000,  87.0000,  88.0000,  89.0000,  

(torch.Size([2, 1, 2, 128, 96]),
 torch.Size([2, 199, 2, 128, 96]),
 torch.Size([2, 1, 3]),
 torch.Size([2, 199, 3]))

In [8]:
# Encoder 

AE_model = AE_xhat_zV2(U_x, filters, latentDim, num_conv, repeat, 
                 skip_connection, stack, conv_k=3, last_k=3, 
                 act=nn.LeakyReLU(), return_z=True, stream=createStreamFcn, device='cpu')

'''
if len(gpu_ids.split(',')) > 1:
    AE_model = nn.DataParallel(AE_model)
    
'''
Xhat,z = AE_model(U_x)
Xhat.shape, z.shape



[128, 8, 6]


(torch.Size([40, 2, 128, 96]), torch.Size([40, 16]))

In [9]:
versionName

'end_to_end_plateau_train_GPUs23_latentDim16_filters128_bz40_numConv4_streamFalse_jacobianFalse_epochs1000_stackTrue'

In [10]:
AE_model.load_state_dict(torch.load(os.path.join('/home/widemann1/surrogates4sims/cps',
'plateau_train_GPUs2_latentDim16_filters128_bz16_numConv4_streamFalse_jacobianFalse_epochs1000_stackTrue_lr0.0001')))
# for 512 dim ? :
#plateau_train_GPUs0_latentDim512_filters128_bz16_numConv4_streamFalse_jacobianFalse_epochs10000_stackTrue_lr0.0001

<All keys matched successfully>

In [11]:
# LIN Model
class MLP(nn.Module):
    def __init__(self, X, hiddenLayerSizes = [1024], activation=nn.ELU()):
        super(MLP,self).__init__()
        
        self.activation = activation
        self.inputSize = X.shape[1:]
        self.modules = []
        self.modules.append(nn.Linear(np.prod(self.inputSize),hiddenLayerSizes[0]))
        self.modules.append(self.activation)
        for idx,sz in enumerate(hiddenLayerSizes[:-1]):
            self.modules.append(nn.Linear(hiddenLayerSizes[idx],hiddenLayerSizes[idx+1]))
            self.modules.append(self.activation)
                               
        self.modules.append(nn.Linear(hiddenLayerSizes[-1],np.prod(self.inputSize)))
        self.layers = nn.Sequential(*self.modules)
                                
        
    def forward(self,x):
        x = self.layers(x)
        return x
    
hiddenLayers = [128,128]
LIN_model = MLP(z, hiddenLayerSizes=hiddenLayers, activation=nn.ELU())
'''
if len(gpu_ids.split(',')) > 1:
    LIN_model = nn.DataParallel(LIN_model)
'''

"\nif len(gpu_ids.split(',')) > 1:\n    LIN_model = nn.DataParallel(LIN_model)\n"

In [12]:
# surrogate class

class Surrogate(nn.Module):
    
    def __init__(self, window,
                 z_size, p_size,
                LIN, encoder, decoder):
        super(Surrogate, self).__init__()
        self.window = window
        self.z_size = z_size # this does not include the size of p
        self.p_size = p_size
        self.c_size = z_size + p_size # this does include the size of p
        self.LIN = LIN
        self.encoder = encoder
        self.decoder = decoder
        
    def encode(self, U):
        
        return self.encoder(U)
        
    def decode(self, encoding):
        
        return self.decoder(encoding)
        
    def predict_next_w_encodings(self, encoding, p_y, window):
        '''
        use the LIN to predict the next w encodings for each 
        encoded U in the batch
        '''
            
        predicted_encodings = []
            
        # given a batch of encodings, advance each encoding window time steps.
        # save the result at each time step
        for i in range(window):
            encoding = self.LIN(encoding) + encoding # use LIN to predict delta in encoding
            # this was encoding[:,:,-self.p_size:] in 09_manta..., why the extra dimension?
            encoding[:,-self.p_size:] = p_y[:, i]
            predicted_encodings.append(encoding)
            
            
        return torch.stack(predicted_encodings)
    
    def forward(self, U, p_x, p_y, window = None):
        
        if window == None:
            window = self.window
        assert p_y.size(1) == window
            
        encoding = self.encode(U)
        encoding[:,-self.p_size:] = p_x # added this on 10/27/2020
        encoding_w = self.predict_next_w_encodings(encoding, p_y, window)
        # want to have this agree with U_y, which is [batch_size, window_size, channels, nx, ny]
        # right now, it's [window_size, batch_size, c_size], so transpose dimensions 0 and 1
        # print(encoding_w.shape)
        U = torch.stack([self.decode(encoding_i) for encoding_i in encoding_w])
        
        return U.transpose(0,1)
    
    
surrogate = Surrogate(window_size, latentDim - 3, 3, LIN_model, AE_model.encoder, AE_model.generator)

In [13]:
# note the important difference here
foo = torch.tensor([[1,1],[2,2],[3,3]])
foo, foo.reshape(2,3), foo.transpose(0,1)

(tensor([[1, 1],
         [2, 2],
         [3, 3]]),
 tensor([[1, 1, 2],
         [2, 3, 3]]),
 tensor([[1, 2, 3],
         [1, 2, 3]]))

In [14]:
encoding = surrogate.encode(U_x)
decoding = surrogate.decode(encoding)
assert surrogate.c_size == latentDim
assert surrogate.p_size == len(p_x[0])
assert encoding.shape[-1] == surrogate.c_size
assert decoding.shape == U_x.shape

In [15]:
U_hat = surrogate.forward(U_x, p_x, p_y)
U_hat.shape, U_y.shape, torch.norm(U_hat-U_y,p=1)/torch.norm(U_y)

(torch.Size([40, 5, 2, 128, 96]),
 torch.Size([40, 5, 2, 128, 96]),
 tensor(395.5565, grad_fn=<DivBackward0>))

In [16]:
del surrogate, encoding, decoding, U_hat

surrogate = Surrogate(window_size, latentDim - 3, 3, LIN_model, AE_model.encoder, AE_model.generator).to(device)

if len(gpu_ids.split(',')) > 1:
    surrogate = nn.DataParallel(surrogate)

In [17]:
surrogate.module.encoder.conv1.weight[-1]

tensor([[[-0.0615,  0.2370,  0.0193],
         [-0.1209, -0.1067,  0.1109],
         [-0.0105,  0.0456, -0.2153]],

        [[-0.0179,  0.2263, -0.1354],
         [ 0.0473, -0.0644, -0.1862],
         [-0.0807,  0.0856,  0.0642]]], device='cuda:0',
       grad_fn=<SelectBackward>)

In [18]:
max_lr = .0001
start_lr = 5*max_lr/10
#opt = create_opt(max_lr,model)
#lr_scheduler = create_one_cycle(opt,max_lr,epochs,trainDataLoader)
opt = torch.optim.Adam(surrogate.parameters(),lr=max_lr,betas=(.5,.999))
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,patience=patience)

In [19]:
def L1_loss(pred, target):
    return torch.mean(torch.abs(pred - target))


def jacobian_loss(pred, target, device='cpu'):
    return L1_loss(jacobian(pred, device), jacobian(target, device))


def curl_loss(pred, target, device):
    return L1_loss(curl(pred, device), curl(target, device))


L = nn.MSELoss()


def p_loss(pred, target):
    return L(pred[:, -target.shape[1]:], target)


def loss(pred, target, device):
    
    if createStreamFcn:
        pred = stream2uv(pred, device)
        
    L1 = L1_loss(pred, target)
    Lj = 0
    if doJacobian:
        Lj = jacobian_loss(pred, target, device)
        
    return L1 + Lj

In [20]:
def trainEpoch(myDataLoader, tensorboard_writer, model, opt, p_loss, loss,
               metric, lr_scheduler, tensorboard_rate, device,
               tensorboard_recorder_step, total_steps):
    running_loss = 0.0
    running_rmse = 0.0
    total_loss = 0.0
    running_ploss = 0.0
    for i, sampleBatch in enumerate(myDataLoader, start=1):

        # --- Main Training ---
        
        # gpu
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.squeeze(1).to(device)
        p_x = p_x.squeeze(1).to(device)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
            

        # zero the parameter gradients
        opt.zero_grad()

        U_hat = model(U_x, p_x, p_y)
        pl = 0
        ll = loss(U_hat, U_y, device)
        combined_loss = pl + ll
        combined_loss.backward()
        opt.step()
        
        # loss
        batch_loss = combined_loss.item()
        running_loss += batch_loss
        total_loss += batch_loss
        
        batch_ploss = pl
        running_ploss += batch_ploss

        # --- Metrics Recording ---

        # metrics
        r = metric(U_hat, U_y)
        running_rmse += r

        # record lr change
        total_steps += 1
        tensorboard_writer.add_scalar(tag="LR", scalar_value=opt.param_groups[0]['lr'], global_step=total_steps)
        #lr_scheduler.step()

        # tensorboard writes
        if (i % tensorboard_rate == 0):
            tensorboard_recorder_step += 1
            avg_running_loss = running_loss/tensorboard_rate
            avg_running_rmse = running_rmse/tensorboard_rate
            avg_running_ploss = running_ploss/tensorboard_rate
            tensorboard_writer.add_scalar(tag="Loss", scalar_value=avg_running_loss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag="p_loss", scalar_value=avg_running_ploss, global_step=tensorboard_recorder_step)
            tensorboard_writer.add_scalar(tag=metric.__name__, scalar_value=avg_running_rmse, global_step=tensorboard_recorder_step)
            # reset running_loss for the next set of batches. (tensorboard_rate number of batches)
            running_loss = 0.0
            running_rmse = 0.0
            running_ploss = 0.0
            tensorboard_writer.flush()

    return total_loss/len(myDataLoader), tensorboard_recorder_step, total_steps

In [21]:
def validEpoch(myDataLoader, tensorboard_writer, model, p_loss, loss, metric,
               device, tensorboard_recorder_step):
    running_loss = 0.0
    running_rmse = 0.0
    for i, sampleBatch in enumerate(myDataLoader, start=1):

        # --- Metrics Recording ---

        # gpu
        U_x, U_y, p_x, p_y = sampleBatch
        U_x = U_x.squeeze(1).to(device) # only squeeze away the window dimension (because batch size = 1)
        p_x = p_x.squeeze(1).to(device) # only squeeze away the window dimension (because batch size = 1)
        U_y = U_y.to(device)
        p_y = p_y.to(device)
        
        perc = len(U_x)/len(myDataLoader.dataset)

        # forward, no gradient calculations
        with torch.no_grad():
            U_hat = model(U_x, p_x, p_y, window = simLen-1)

        # loss
        combined_loss = loss(U_hat, U_y, device)
        
        running_loss += perc*(combined_loss.item())

        # metrics
        r = metric(U_hat, U_y)
        running_rmse += perc*r

    avg_running_loss = running_loss
    avg_running_rmse = running_rmse
    tensorboard_writer.add_scalar(tag="Loss", scalar_value=avg_running_loss, global_step=tensorboard_recorder_step)
    tensorboard_writer.add_scalar(tag=metric.__name__, scalar_value=avg_running_rmse, global_step=tensorboard_recorder_step)
    tensorboard_writer.flush()
    
    return running_loss

In [22]:
try:
    os.mkdir(cps)
except:
    print("checkpoints directory already exists :)")
    
# create a summary writer.
train_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'train'))
test_writer = SummaryWriter(os.path.join(tensorboard_direc, versionName,'valid'))
tensorboard_recorder_step = 0
total_steps = 0

checkpoints directory already exists :)


In [None]:
writeMessage('---------- Started Training ----------', versionName)
bestLoss = np.infty

if not eval_only:
    for epoch in tqdm(range(1, epochs+1)):  # loop over the dataset multiple times

        writeMessage("--- Epoch {0}/{1} ---".format(epoch, epochs), versionName)

        surrogate.train()
        trainLoss, tensorboard_recorder_step, total_steps = trainEpoch(trainDataLoader, 
                                                                       train_writer, surrogate,
                                                                       opt, p_loss, loss,
                                                                       rmse, lr_scheduler, 
                                                                       tensorboard_rate, device,
                                                                       tensorboard_recorder_step, total_steps)

        writeMessage("trainLoss: {:.4e}".format(trainLoss),versionName)
        writeMessage("LR: {:.4e}".format(opt.param_groups[0]['lr']),versionName)
#         if trainLoss < bestLoss:
#             bestLoss = trainLoss
#             writeMessage("Better trainLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
#             torch.save(surrogate.state_dict(), os.path.join(cps,versionName))

        surrogate.eval()
        valLoss = validEpoch(testDataLoader, test_writer, surrogate, p_loss, loss, rmse, device, tensorboard_recorder_step)
        writeMessage("valLoss: {:.4e}".format(valLoss),versionName)

        # checkpoint progress
        if valLoss < bestLoss:
            bestLoss = valLoss
            writeMessage("Better valLoss: {:.4e}, Saving models...".format(bestLoss),versionName)
            torch.save(surrogate.state_dict(), os.path.join(cps,versionName))

        lr_scheduler.step(trainLoss)

        if opt.param_groups[0]['lr'] < 5e-8:
            break
    writeMessage('---------- Finished Training ----------', versionName)

  0%|          | 0/1000 [00:00<?, ?it/s]

---------- Started Training ----------
--- Epoch 1/1000 ---




trainLoss: 1.4681e-02
LR: 1.0000e-04


In [None]:
surrogate.load_state_dict(torch.load(os.path.join(cps,versionName)))

In [None]:
surrogate.eval()
U_hats = []
Us = []
for i, sampleBatch in enumerate(simulation_testDataLoader, start=1):

    # gpu
    U_x, U_y, p_x, p_y = sampleBatch
    U_x = U_x.squeeze(1).to(device)
    p_x = p_x.squeeze(1).to(device)
    U_y = U_y.to(device)
    p_y = p_y.to(device)
    with torch.no_grad():
        Us.append(U_y.detach().cpu())
        
        U_hat = surrogate(U_x, p_x, p_y, window=simLen-1)
                    
        U_hats.append(U_hat.detach().cpu())
        
        
Real_U = torch.stack(Us)
#Real_X_img = convertSimToImage(Real_X)

Surr_U = torch.stack(U_hats)
#Surr_X_img = convertSimToImage(Surr_X)

In [None]:
rel_error = torch.norm(Real_U - Surr_U)/torch.norm(Real_U)
writeMessage("Relative_Error: {:.4e}".format(rel_error),versionName)
test_writer.add_scalar(tag="Relative_Error", scalar_value=rel_error, global_step=tensorboard_recorder_step)
test_writer.flush()