**Feb 20, 2024**: Only change is use of Conv PNODE

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# %%
import pandas as pd

# %%
import re
import os
import time

# %%
import matplotlib.pyplot as plt
import argparse
import logging

In [2]:
# %%
# SPECIFY ARGS
# Generative model for noisy data based on ODE
parser = argparse.ArgumentParser('PNODE')

# parser.add_argument('-n',  type=int, default=1, help="Size of the dataset")
# parser.add_argument('-n',  type=int, default=100, help="Size of the dataset")

parser.add_argument('--niters', type=int, 
                    default=50)
parser.add_argument('--lr',  type=float, default=1e-2, help="Starting learning rate.")
parser.add_argument('-bs', '--batch-size', type=int, default=4)
parser.add_argument('-bt', '--batch-time', type=int, default=5)
parser.add_argument('-nspb', '--nsims_per_batch', type=int, default=1)
parser.add_argument('-epch', '--nepochs', type=int, default=300)


parser.add_argument('--save', type=str, default='experiments/', 
                    help="Path for save checkpoints")
parser.add_argument('--load', type=str, 
                    default=None, 
#                     default=86364,
                    help="ID of the experiment to load for evaluation. If None, run a new experiment.")
parser.add_argument('-r', '--random-seed', type=int, default=1123, help="Random_seed")


parser.add_argument('--pnode', 
                    action='store_false', 
                    help="RUN parameterized neural ode")
parser.add_argument('--node-layers', 
                    type=int, 
                    default=3, 
                    help="number of layers in NODE")

parser.add_argument('-u', '--units', 
                    type=int, 
                    default=90, 
                    help="Number of units per layer in ODE func")

parser.add_argument('--nParamsToUse',
                    type=int,
                    default=9,
                    help="Number of CME params to use")

# parser.add_argument('--normalize', type=bool, default=True)

parser.add_argument('-ds',
                    type=int,
                    default=1,
                    help="Coarsening factor for position angles")

# args = parser.parse_args(args=())
args = parser.parse_args(args=())

# %%
vars(args)

{'niters': 50,
 'lr': 0.01,
 'batch_size': 4,
 'batch_time': 5,
 'nsims_per_batch': 1,
 'nepochs': 300,
 'save': 'experiments/',
 'load': None,
 'random_seed': 1123,
 'pnode': True,
 'node_layers': 3,
 'units': 90,
 'nParamsToUse': 9,
 'ds': 1}

In [3]:
import numpy as np
import scipy.linalg as la
import scipy.sparse as sparse
import matplotlib.pyplot as plt
from random import SystemRandom

In [4]:
%matplotlib inline

In [5]:
plt.rc("axes.spines", right=True, top=True)
plt.rc("figure", dpi=300, 
       figsize=(9, 3)
      )
plt.rc("font", family="serif")
plt.rc("legend", edgecolor="none", frameon=True)
plt.style.use("dark_background")

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [7]:
adjoint=True
if adjoint:
    from torchdiffeq import odeint_adjoint as odeint
else:
    from torchdiffeq import odeint

In [8]:
import edge_utils as edut
import node_1d_utils as nut

# %%
import logging

# stealing this from: 
# https://github.com/rtqichen/torchdiffeq/blob/master/examples/odenet_mnist.py#L250C1-L274C18
def get_logger(logpath, package_files=[], displaying=True, saving=True, debug=False):
    logger = logging.getLogger()
    if debug:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logger.setLevel(level)
    if saving:
        info_file_handler = logging.FileHandler(logpath, mode="a")
        info_file_handler.setLevel(level)
        logger.addHandler(info_file_handler)
    if displaying:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(level)
        logger.addHandler(console_handler)
        #     logger.info(filepath)

        #     with open(filepath, "r") as f:
        #         logger.info(f.read())

        #     for f in package_files:
        #         logger.info(f)
        #         with open(f, "r") as package_f:
        #             logger.info(package_f.read())

    return logger


# %%
edut.makedirs(os.path.join(os.getcwd(), "logs"))
logdir = os.path.join(os.getcwd(), "logs")

In [9]:
theta_s_2161 = np.linspace(0, 360, 512)[160] + 1.2 * 180 - 360
theta_e_2161 = np.linspace(0, 360, 512)[320] + 1.2 * 180 - 360
print("Range of angles for CR2161: {} {}".format(theta_s_2161, theta_e_2161))

# %%
ed_2161, sd_2161 = edut.load_edge_data_blobfree(2161)
# crude downsample for now
ed_2161 = ed_2161[:, ::args.ds, :]

# %%
nTimes, nTheta_2161, nSims_2161 = ed_2161.shape
nTimes, nTheta_2161, nSims_2161

# %%
theta_grid = np.linspace(np.ceil(theta_s_2161), np.ceil(theta_e_2161), nTheta_2161 * args.ds)[::args.ds]
theta_grid[:5]

Range of angles for CR2161: -31.279843444227026 81.44031311154595


array([-31.        , -30.28930818, -29.57861635, -28.86792453,
       -28.1572327 ])

In [10]:
theta_grid.shape

(160,)

In [11]:
def getRValuesAllSims(edge_data_matrix, sample_freq=2):
    """
    Return r values for all sims at once so we don't lose time in training processing r values repeatedly
    """
    r_data_matrix = np.zeros(edge_data_matrix.shape)
    nsims = edge_data_matrix.shape[2]
    for i in range(nsims):
        r_vals, theta_vals = edut.getRValues(edge_data_matrix, simIdx=i, minStartIdx=0, sample_freq=sample_freq)
        r_data_matrix[:, :, i] = r_vals

    return r_data_matrix


# %%
rd_2161 = getRValuesAllSims(ed_2161, sample_freq=args.ds)
print(rd_2161.shape)
# %%
# we are removing some data where the edge detection is not necessarily super reliable.
sims_to_remove = np.array([33, 39, 63, 73, 113, 128, 131, 142, 193, 218, 253, 264, 273, 312, 313, 324])

# %%
sd_modified = np.setdiff1d(sd_2161, sims_to_remove)

(90, 160, 278)


In [12]:
from numpy.random import Generator, PCG64
# rng = Generator(PCG64())
rng = np.random.default_rng(seed=202310)

nTrain = int(np.floor(0.7 * len(sd_modified)))
nCalib = int(np.floor(0.15 * len(sd_modified)))
nTest = len(sd_modified) - nTrain - nCalib

print(nTrain, nCalib, nTest)

sd_train = np.sort(rng.choice(sd_modified, nTrain, replace=False))
sd_calib = np.sort(rng.choice(np.setdiff1d(sd_modified, sd_train), nCalib, replace=False))
sd_test = np.setdiff1d(sd_modified, np.sort(np.concatenate((sd_train, sd_calib), axis=0)))

183 39 40


In [13]:
orig_sd_train_idx = np.array([np.where(sd_2161 == i)[0][0] for i in sd_train])
orig_sd_test_idx = np.array([np.where(sd_2161 == i)[0][0] for i in sd_test])
orig_sd_calib_idx = np.array([np.where(sd_2161 == i)[0][0] for i in sd_calib])

In [40]:
len(orig_sd_train_idx)

183

In [14]:
y_train = rd_2161[:, :, orig_sd_train_idx]
y_test = rd_2161[:, :, orig_sd_test_idx]
y_calib = rd_2161[:, :, orig_sd_calib_idx]

In [15]:
tMinTrain = []
tMaxTrain = []

tMinTrainIdx=[]
tMaxTrainIdx=[]

dtTrain = []
yMinTrain = []
yMaxTrain = []

tMinTest = []
tMaxTest = []
dtTest = []

tMinCalib = []
tMaxCalib = []
dtCalib = []

for sidx in orig_sd_train_idx:
    
    r_sim = rd_2161[:, :, sidx]
    
    tMinIdx, tMin, tMaxIdx, tMax = edut.getTMinTMax(ed_2161, simIdx = sidx)

    r_sim_valid = r_sim[tMinIdx:(tMaxIdx + 1), :]
    
    tMinTrain.append(tMin)
    tMaxTrain.append(tMax)

    tMinTrainIdx.append(tMinIdx)
    tMaxTrainIdx.append(tMaxIdx)

    
    yMinTrain.append(r_sim_valid.min())
    yMaxTrain.append(r_sim_valid.max())
    
    tAllScaled = (np.arange(tMin, tMax + 2, step=2) - tMin) / (tMax - tMin)
    
    dtTrain.append(tAllScaled[1] - tAllScaled[0])
    
    
for sidx in orig_sd_test_idx:
    
    r_sim = rd_2161[:, :, sidx]
    
    tMinIdx, tMin, tMaxIdx, tMax = edut.getTMinTMax(ed_2161, simIdx = sidx)

    r_sim_valid = r_sim[tMinIdx:(tMaxIdx + 1), :]
    
    tMinTest.append(tMin)
    tMaxTest.append(tMax)
        
    tAllScaled = (np.arange(tMin, tMax + 2, step=2) - tMin) / (tMax - tMin)
    
    dtTest.append(tAllScaled[1] - tAllScaled[0])
    
    
    
for sidx in orig_sd_calib_idx:    
    r_sim = rd_2161[:, :, sidx]
    
    tMinIdx, tMin, tMaxIdx, tMax = edut.getTMinTMax(ed_2161, simIdx = sidx)

    r_sim_valid = r_sim[tMinIdx:(tMaxIdx + 1), :]
    
    tMinCalib.append(tMin)
    tMaxCalib.append(tMax)
        
    tAllScaled = (np.arange(tMin, tMax + 2, step=2) - tMin) / (tMax - tMin)
    
    dtCalib.append(tAllScaled[1] - tAllScaled[0])

In [16]:
# NOW NORMALIZE YTRAIN AND YTEST
yMinTrainAll = np.array(yMinTrain).min()
yMaxTrainAll = np.array(yMaxTrain).max()

yMinTrainAll, yMaxTrainAll

# %%
y_train_normalized = (y_train - yMinTrainAll) / (yMaxTrainAll - yMinTrainAll)
y_test_normalized = (y_test - yMinTrainAll) / (yMaxTrainAll - yMinTrainAll)

# %%
y_calib_normalized = (y_calib - yMinTrainAll) / (yMaxTrainAll - yMinTrainAll)

In [17]:
# %%
cme_params_norm = pd.read_csv("./CMEParams2161_Scaled.csv", index_col=0)
cme_params_norm

# %%
cme_params_to_augment = cme_params_norm.to_numpy()
cme_params_to_augment.shape

(278, 9)

In [18]:
input_dim = rd_2161.shape[1]
param_dim = 9
input_dim, param_dim

(160, 9)

In [19]:
augmented_r = np.zeros((rd_2161.shape[0], input_dim + param_dim, rd_2161.shape[2]))
augmented_r[:, :input_dim, orig_sd_train_idx] = y_train_normalized
augmented_r[:, :input_dim, orig_sd_test_idx] = y_test_normalized
augmented_r[:, :input_dim, orig_sd_calib_idx] = y_calib_normalized
for iii in range(rd_2161.shape[2]):
    augmented_r[:, (input_dim):, iii] = cme_params_to_augment[iii, :]
    
    
aug_y_train = augmented_r[:, :, orig_sd_train_idx]
aug_y_test = augmented_r[:, :, orig_sd_test_idx]
aug_y_calib = augmented_r[:, :, orig_sd_calib_idx]

In [20]:
def init_network_weights_xavier_normal(net):
    for m in net.modules():
        if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            nn.init.constant_(m.bias, val=0)

In [21]:
# Define Encoder-Decoder PNODE Now
class ODENet(nn.Module):
    def __init__(self, 
                 latent_dim, 
                 param_dim,
                 n_layers=1,
                 n_units=100,
                 nonlinear=nn.ELU):
    
        super(ODENet, self).__init__()
        layers = [nn.Linear(latent_dim + param_dim, n_units)]
        for i in range(n_layers - 1):
            layers.append(nonlinear())
            layers.append(nn.Linear(n_units, n_units))

        layers.append(nonlinear())
        layers.append(nn.Linear(n_units, latent_dim))  
        odenet = nn.Sequential(*layers)

        init_network_weights_xavier_normal(odenet)

        self.odenet = odenet
        self.latent_dim = latent_dim
    
    def forward(self, t, y):
        
        output = torch.cat((self.odenet(y),
                    torch.zeros_like(y[:, self.latent_dim:])), 
                    -1)
        
        return output

class PNODE_Conv(nn.Module):
    def __init__(self, 
                 input_dim,
                 latent_dim,
                 param_dim, 
                 n_layers=1, 
                 n_units=100,
                 nonlinear=nn.ELU):
        super(PNODE_Conv, self).__init__()
        
        encoder = nn.Sequential(nn.ZeroPad2d((7,8,0,0)),
                                nn.Conv1d(1,8,16,stride=2,padding=0),
                                nn.ELU(),
                                nn.ZeroPad2d((3,4,0,0)),
                                nn.Conv1d(8,16,8,stride=4,padding=0),
                                nn.ELU(),
                                nn.ZeroPad2d((1,2,0,0)),
                                nn.Conv1d(16,32,4,stride=4,padding=0),
                                nn.ELU(),
                                nn.ZeroPad2d((1,2,0,0)),
                                nn.Conv1d(32,64,4,stride=4,padding=0),
                                nn.ELU(),
                                nn.Flatten(),
                                nn.Linear(128,latent_dim), 
                                nn.ELU()
                                )
        

        
        decoder_mlp = nn.Sequential(
            nn.Linear(latent_dim, 80),
            nn.ELU())
        decoder_conv = nn.Sequential(nn.ConvTranspose1d(40, 20, 4, stride=4),
                            nn.ELU(),
                            nn.ConvTranspose1d(20, 10, 4, stride=4),
                            nn.ELU(),
                            nn.ConvTranspose1d(10, 5, 3, stride=3, padding=8),
                            nn.ELU(),
                            nn.ConvTranspose1d(5, 1, 6, stride=2, padding=2))
        
        
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        
        self.encoder=encoder
        self.pnode=ODENet(latent_dim, param_dim).to(device)
        self.decoder_mlp = decoder_mlp
        self.decoder_conv = decoder_conv
        
        init_network_weights_xavier_normal(encoder)
        init_network_weights_xavier_normal(decoder_mlp)
        init_network_weights_xavier_normal(decoder_conv)
        
        
    def forward(self, t, y):
        y_init = y[:, None, :self.input_dim].reshape((-1, 1, self.input_dim))
        init_latent = self.encoder(y_init).reshape((1, -1, self.latent_dim))
        init_latent = torch.cat((init_latent, y[:, None, self.input_dim:]),-1)
        latent_states = odeint(self.pnode, init_latent[0, :, :], t)
        #         ntraj, nbatch, _  = latent_states.shape
        latent_mlp = self.decoder_mlp(latent_states[:, :, :self.latent_dim])
        latent_mlp = torch.permute(latent_mlp, (1, 0, 2))
        latent_mlp = latent_mlp[None, :, :, :]
        nbatch, ntraj, nt, _ = latent_mlp.shape
        latent_mlp = latent_mlp.reshape((-1, 40, 2))
        pred_sol = self.decoder_conv(latent_mlp)
        pred_sol = pred_sol.reshape((nbatch, ntraj, nt, 160))
        
        return pred_sol

In [22]:
pnode_conv = PNODE_Conv(input_dim=160, latent_dim=15, param_dim=9).to(device)

In [23]:
optimizer = optim.Adamax(pnode_conv.parameters(), lr=args.lr)

print(args.lr)
pEncoder = edut.count_parameters(pnode_conv.encoder)
pPNODE = edut.count_parameters(pnode_conv.pnode)
pDecoder = edut.count_parameters(pnode_conv.decoder_mlp) + edut.count_parameters(pnode_conv.decoder_conv)

print(pEncoder + pPNODE + pDecoder)

0.01
22958


In [24]:
edut.makedirs(args.save)

if args.load is not None:
    experimentID = args.load
else:
    experimentID = int(SystemRandom().random()*100000)
print(experimentID)

ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt')
print(ckpt_path)

# %%
logger = get_logger(logpath=os.path.join(logdir, "expt_normalized_pnode_siam2.log"))
logger

74807
experiments/experiment_74807.ckpt


<RootLogger root (INFO)>

In [25]:
# %%
logger.info(os.getcwd())
logger.info(pnode_conv)
logger.info("Number of parameters: {}".format(pEncoder + pPNODE + pDecoder))
logger.info(vars(args))
logger.info("Train Idx")
logger.info(sd_train)
logger.info("Calib Idx")
logger.info(sd_calib)
logger.info("Test Idx")
logger.info(sd_test)
logger.info("Checkpoint Path")
logger.info(ckpt_path)
logger.info("Input Dim: ")
logger.info(input_dim)
logger.info("Param Dim: ")
logger.info(param_dim)
logger.info(device)

/home/ajivani/WLROM_new/EdgeSS
PNODE_Conv(
  (encoder): Sequential(
    (0): ZeroPad2d((7, 8, 0, 0))
    (1): Conv1d(1, 8, kernel_size=(16,), stride=(2,))
    (2): ELU(alpha=1.0)
    (3): ZeroPad2d((3, 4, 0, 0))
    (4): Conv1d(8, 16, kernel_size=(8,), stride=(4,))
    (5): ELU(alpha=1.0)
    (6): ZeroPad2d((1, 2, 0, 0))
    (7): Conv1d(16, 32, kernel_size=(4,), stride=(4,))
    (8): ELU(alpha=1.0)
    (9): ZeroPad2d((1, 2, 0, 0))
    (10): Conv1d(32, 64, kernel_size=(4,), stride=(4,))
    (11): ELU(alpha=1.0)
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=128, out_features=15, bias=True)
    (14): ELU(alpha=1.0)
  )
  (pnode): ODENet(
    (odenet): Sequential(
      (0): Linear(in_features=24, out_features=100, bias=True)
      (1): ELU(alpha=1.0)
      (2): Linear(in_features=100, out_features=15, bias=True)
    )
  )
  (decoder_mlp): Sequential(
    (0): Linear(in_features=15, out_features=80, bias=True)
    (1): ELU(alpha=1.0)
  )
  (decoder_conv): Sequenti

In [26]:
time_steps = torch.Tensor(np.linspace(0, 1, rd_2161.shape[0]))
time_steps.shape

torch.Size([90])

In [27]:
# %%
def get_data_for_sim(sidx, device="cpu"):
    """
    Supply sidx from either orig_sd_train_idx or orig_sd_test_idx
    Based on that, index augmented r dataset, and return relevant training data
    as well as training time.
    """
    tMinIdx, tMin, tMaxIdx, tMax = edut.getTMinTMax(ed_2161, simIdx = sidx)
    
    #     valid_times = np.arange(tMin, tMax + 2, step=2)
        
    #     tAllScaled = (valid_times - tMin) / (tMax - tMin)
    
    time_steps = np.linspace(0, 1, rd_2161.shape[0])
    tAllScaled = time_steps[:(tMaxIdx - tMinIdx + 1)]
    
    r_sim = augmented_r[tMinIdx:(tMaxIdx + 1), :, sidx]
    
    y0_train_torch = torch.from_numpy(np.float32(r_sim[0, :])).reshape((1, len(r_sim[0, :]))).to(device)
    t_train_torch = torch.Tensor(tAllScaled).to(device)
    y_train_torch = torch.Tensor(r_sim).to(device)
    
    return y0_train_torch, t_train_torch, y_train_torch

In [28]:
ckpt_freq=25

def update_learning_rate(optimizer, decay_rate = 0.999, lowest = 1e-3):
    for param_group in optimizer.param_groups:
        lr = param_group['lr']
        lr = max(lr * decay_rate, lowest)
        param_group['lr'] = lr

In [29]:
sim_ids_all = np.array([i for i in range(len(orig_sd_train_idx))])

In [None]:
orig_sd_train_idx

In [41]:
sim_ids_all

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [42]:
sim_ids_to_train[0]

165

In [30]:
# sim_ids_to_train = np.sort(rng.choice(sim_ids_all, args.nsims_per_batch, replace=False))
# y0_data, tt_data, ytt_data = get_data_for_sim(sim_ids_to_train[0], device=device)

In [31]:
# y0_data.shape

In [32]:
# il = pnode_conv.encoder(y0_data[:, None, :input_dim]).reshape((1, -1, 15))
# il2 = torch.cat((il, y0_data[:, None, input_dim:]),-1)
# il2.shape

In [33]:
# ls = odeint(pnode_conv.pnode, il2[0, :, :], tt_data)
# ls.shape

In [34]:
# latent_mlp = pnode_conv.decoder_mlp(ls[:, :, :15])
# latent_mlp.shape

In [35]:
# y0_data.reshape((1, 1, input_dim + param_dim))[:, :, :input_dim]

In [36]:
# torch.squeeze(pnode_conv(tt_data, y0_data)).shape

In [38]:
if args.load is not None:
    edut.get_ckpt_model(ckpt_path, pnode_conv, device)
else:
    for ep in range(1, args.nepochs + 1):
        sim_ids_to_train = np.sort(rng.choice(sim_ids_all, args.nsims_per_batch, replace=False))
        y0_data, tt_data, ytt_data = get_data_for_sim(sim_ids_to_train[0], device=device)
        
        best_loss = 1e20
        for itr in range(1, args.niters + 1):
            optimizer.zero_grad()
            update_learning_rate(optimizer, decay_rate = 0.999, lowest = args.lr/10)
            
            pred_y = torch.squeeze(pnode_conv(tt_data, y0_data))
            losses = torch.mean(torch.abs(pred_y - ytt_data[:, :input_dim]))
            losses.backward()
            optimizer.step()
            
            with torch.no_grad():
                val_loss = 0
                for i in range(len(sd_calib)):
                    y0_data, tt_data, ytt_data = get_data_for_sim(orig_sd_calib_idx[i], device=device)
                    val_pred_y = torch.squeeze(pnode_conv(tt_data, y0_data))
                    val_loss += torch.mean(torch.abs(val_pred_y - ytt_data[:, :input_dim]))
                
                message = "Epoch {:02d}|Iteration {:03d}|Val Losses Agg {:.4f}|Sim {:03d}|Train loss batch {:.4f}|".format(
                        ep,
                        itr, 
                        val_loss,
                        sim_ids_to_train[0],
                        losses.item())
                
                logger.info(message)
                
        if itr % ckpt_freq == 0:
            print('saving check point...')
            torch.save({'args': args,
                        'state_dict': pnode_conv.state_dict(),
                       }, ckpt_path)

Epoch 01|Iteration 001|Val Losses Agg 11.6788|Sim 117|Train loss batch 0.1958|
Epoch 01|Iteration 002|Val Losses Agg 9.6814|Sim 117|Train loss batch 0.4928|
Epoch 01|Iteration 003|Val Losses Agg 7.8701|Sim 117|Train loss batch 0.4337|
Epoch 01|Iteration 004|Val Losses Agg 7.0431|Sim 117|Train loss batch 0.3618|
Epoch 01|Iteration 005|Val Losses Agg 9.1988|Sim 117|Train loss batch 0.2825|
Epoch 01|Iteration 006|Val Losses Agg 12.9266|Sim 117|Train loss batch 0.2407|
Epoch 01|Iteration 007|Val Losses Agg 13.1310|Sim 117|Train loss batch 0.2597|
Epoch 01|Iteration 008|Val Losses Agg 10.9804|Sim 117|Train loss batch 0.2433|
Epoch 01|Iteration 009|Val Losses Agg 9.1127|Sim 117|Train loss batch 0.2035|
Epoch 01|Iteration 010|Val Losses Agg 8.2023|Sim 117|Train loss batch 0.1881|
Epoch 01|Iteration 011|Val Losses Agg 7.9266|Sim 117|Train loss batch 0.1866|
Epoch 01|Iteration 012|Val Losses Agg 8.1385|Sim 117|Train loss batch 0.1824|
Epoch 01|Iteration 013|Val Losses Agg 8.9081|Sim 117|Train l

saving check point...


Epoch 02|Iteration 001|Val Losses Agg 11.3733|Sim 138|Train loss batch 0.4376|
Epoch 02|Iteration 002|Val Losses Agg 10.9402|Sim 138|Train loss batch 0.0571|
Epoch 02|Iteration 003|Val Losses Agg 11.8333|Sim 138|Train loss batch 0.0636|
Epoch 02|Iteration 004|Val Losses Agg 14.0371|Sim 138|Train loss batch 0.0404|
Epoch 02|Iteration 005|Val Losses Agg 15.4204|Sim 138|Train loss batch 0.0204|
Epoch 02|Iteration 006|Val Losses Agg 15.0491|Sim 138|Train loss batch 0.0452|
Epoch 02|Iteration 007|Val Losses Agg 13.2435|Sim 138|Train loss batch 0.0391|
Epoch 02|Iteration 008|Val Losses Agg 11.8279|Sim 138|Train loss batch 0.0171|
Epoch 02|Iteration 009|Val Losses Agg 11.8169|Sim 138|Train loss batch 0.0332|
Epoch 02|Iteration 010|Val Losses Agg 13.0971|Sim 138|Train loss batch 0.0341|
Epoch 02|Iteration 011|Val Losses Agg 14.5377|Sim 138|Train loss batch 0.0153|
Epoch 02|Iteration 012|Val Losses Agg 14.4559|Sim 138|Train loss batch 0.0290|
Epoch 02|Iteration 013|Val Losses Agg 13.0635|Sim 13

saving check point...


Epoch 03|Iteration 001|Val Losses Agg 12.5255|Sim 035|Train loss batch 0.4197|
Epoch 03|Iteration 002|Val Losses Agg 12.4043|Sim 035|Train loss batch 0.0232|
Epoch 03|Iteration 003|Val Losses Agg 13.5908|Sim 035|Train loss batch 0.0248|
Epoch 03|Iteration 004|Val Losses Agg 14.3992|Sim 035|Train loss batch 0.0102|
Epoch 03|Iteration 005|Val Losses Agg 13.7757|Sim 035|Train loss batch 0.0203|
Epoch 03|Iteration 006|Val Losses Agg 12.5010|Sim 035|Train loss batch 0.0110|
Epoch 03|Iteration 007|Val Losses Agg 12.6084|Sim 035|Train loss batch 0.0205|
Epoch 03|Iteration 008|Val Losses Agg 13.9095|Sim 035|Train loss batch 0.0183|
Epoch 03|Iteration 009|Val Losses Agg 14.2059|Sim 035|Train loss batch 0.0134|
Epoch 03|Iteration 010|Val Losses Agg 13.2135|Sim 035|Train loss batch 0.0175|
Epoch 03|Iteration 011|Val Losses Agg 12.9172|Sim 035|Train loss batch 0.0095|
Epoch 03|Iteration 012|Val Losses Agg 13.8056|Sim 035|Train loss batch 0.0136|
Epoch 03|Iteration 013|Val Losses Agg 13.8559|Sim 03

saving check point...


Epoch 04|Iteration 001|Val Losses Agg 11.9253|Sim 165|Train loss batch 0.3973|
Epoch 04|Iteration 002|Val Losses Agg 11.6534|Sim 165|Train loss batch 0.0334|
Epoch 04|Iteration 003|Val Losses Agg 12.3190|Sim 165|Train loss batch 0.0363|
Epoch 04|Iteration 004|Val Losses Agg 13.7457|Sim 165|Train loss batch 0.0205|
Epoch 04|Iteration 005|Val Losses Agg 14.4196|Sim 165|Train loss batch 0.0168|


KeyboardInterrupt: 