In [1]:
"""──────────────────────────────────────────────────────────────────────────┐
│ Loading necessary libraries to build and train model                       │
└──────────────────────────────────────────────────────────────────────────"""
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import os,sys,gc
import numpy as np
import pickle
import torch
import proplot as plot
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import pandas as pd
import glob
import properscoring as ps
from copy import deepcopy
plot.rc.update({'figure.facecolor':'w','axes.labelweight':'ultralight',
                'tick.labelweight':'ultralight','gridminor.linestyle':'--','title.weight':'normal','linewidth':0.5})
import random

In [2]:
import sys
sys.path.insert(1, '/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/')
from util.ml import (preproc,vae)
from util.wrf_process import (read_and_write)
import read_config

# Read files

In [3]:
class proc_X:
    def __init__(self,X,PCA):
        self.X=X
        self.PCA=PCA
        
    def myPCA_projection_sen(self,varname,toproj_flatvar,orig_flatvar):
        projvar_transformed = np.dot(toproj_flatvar-np.nanmean(orig_flatvar,axis=0),self.PCA[varname].components_.T)
        return projvar_transformed

    def create_timeseries(self,varname):
        Xtrain,Xvalid,Xtest = self.X['train'], self.X['valid'], self.X['test']
        train = self.PCA[varname].transform(Xtrain[varname])
        valid = self.myPCA_projection_sen(varname,Xvalid[varname],Xtrain[varname])
        test = self.myPCA_projection_sen(varname,Xtest[varname],Xtrain[varname])
        return {'train':train,'valid':valid,'test':test}

    def normalize_timeseries(self,timeseries=None,category='train'):
        #assert timeseries['u'].shape[-1]==26,"var shape error"
        output = np.zeros_like(timeseries[category])
        for le in range(timeseries[category].shape[1]):
            trainmean,trainstd = np.nanmean(timeseries['train'][:,le]), np.nanstd(timeseries['train'][:,le])
            output[:,le] = (timeseries[category][:,le]-trainmean)/trainstd
        return output

In [22]:
sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[0].split('/')[-1][int(17):].split('.')[0]
#sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[17]

'0_3'

In [9]:
for i in tqdm([1,18]):#range(len(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xsmooth9100*'))))[:]):
    X = read_and_write.depickle(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xsmooth9100*'))[i])
    validindices = sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xsmooth9100*'))[i].split('/')[-1].split('.')[0][12:]
    PCA = read_and_write.depickle(sorted(glob.glob('../../storage/proc/PCA/PCAsmooth9100*'))[i])
    y = read_and_write.depickle(sorted(glob.glob('../../storage/proc/y*'))[i])
    
    LWs = proc_X(X,PCA['PCA']).create_timeseries('LW')
    SWs = proc_X(X,PCA['PCA']).create_timeseries('SW')
    LWstop = np.abs(PCA['PCA']['LW'].explained_variance_ratio_.cumsum()-0.5).argmin()
    SWstop = np.abs(PCA['PCA']['SW'].explained_variance_ratio_.cumsum()-0.8).argmin()
    
    LWs_norml = {'train':proc_X(X,PCA['PCA']).normalize_timeseries(LWs,'train')[:,:LWstop],
                 'valid':proc_X(X,PCA['PCA']).normalize_timeseries(LWs,'valid')[:,:LWstop],
                 'test':proc_X(X,PCA['PCA']).normalize_timeseries(LWs,'test')[:,:LWstop]}
    SWs_norml = {'train':proc_X(X,PCA['PCA']).normalize_timeseries(SWs,'train')[:,:SWstop],
                 'valid':proc_X(X,PCA['PCA']).normalize_timeseries(SWs,'valid')[:,:SWstop],
                 'test':proc_X(X,PCA['PCA']).normalize_timeseries(SWs,'test')[:,:SWstop]}
    Xtrain = np.concatenate([LWs_norml['train'],SWs_norml['train']],axis=1)
    Xvalid = np.concatenate([LWs_norml['valid'],SWs_norml['valid']],axis=1)
    Xtest = np.concatenate([LWs_norml['test'],SWs_norml['test']],axis=1)
    read_and_write.save_to_pickle({'train':Xtrain,'valid':Xvalid,'Xtest':Xtest},f'../../storage/proc/Xsmooth/9100/Xtimeseries_{validindices}.pkl')
    del X,PCA,y,Xtrain,Xvalid,Xtest
    gc.collect()

  0%|          | 0/2 [00:00<?, ?it/s]

# Train VED: Get best params

In [6]:
def objective(trial):
    models,losses = [],[]
    model = vae.VAE(nummem[-2],nummem[-1],1,1,1,nummem)
    #droprate = trial.suggest_float("droprate",0.05,0.45)
    lr = trial.suggest_float("lr",1e-6,1e-3)#,log=True)
    optimizer = torch.optim.Adam(model.parameters(),lr=lr)
    criterion = vae.vae_loss
    n_epochs = 10000
    scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-8, max_lr=1e-4,cycle_momentum=False)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',min_lr=1e-12)

    schedulerCY,schedulerLS = scheduler2,scheduler

    l2_lambda = trial.suggest_float("l2_lambda",0.01,0.02)
    #model,loss = train_model(model=model,train_data=data_loaders['train'],val_data=data_loaders['val'],optimizer=optimizer,scheduler=[scheduler,scheduler2],numepochs=num_epochs,early_stopper=None,variance_store=None,\
    #                         lossfunc=lossfuncs[0],regularization='L2',l1_lambda=0.1,l2_lambda=l2_lambda,trial=trial)
    #torch.save(model,'../tmp/bayesian/saved_model.8.'+str(trial.number)+'.pt')
    # Define Loss, Optimizer
    train_losses = []
    val_losses = []
    for epoch in range(1,n_epochs+1):
        loss = 0
        for features, labels in train_loader:
            optimizer.zero_grad() # Clears existing gradients from previous epoch
            reconX,mu1,logvar1,mu2,logvar2 = model(features)
            batch_loss,_,_ = vae.vae_loss(reconX, labels.unsqueeze(1),mu1,logvar1,mu2,logvar2,losscoeff)
            batch_loss.backward()
            optimizer.step()
            schedulerCY.step()
            loss += batch_loss.item()
        loss = loss/len(train_loader)
        train_losses.append(loss)
        criterion = vae.vae_loss
        val_loss,_,_ = vae.eval_model(model,
                              val_loader,
                              criterion,
                             l2_lambda,
                                  losscoeff)
        schedulerLS.step(val_loss)
        val_losses.append(val_loss)
        if epoch%1000 == 0:
            print('Epoch: {}/{}.............'.format(epoch, n_epochs))
            print("Loss: {:.4f}".format(loss))
        #if val_loss <= min(val_losses):
        #    torch.save(model,'best_model'+str(trial.number))
    #torch.save(model,'./tmp/bayesian/best_model.8.'+str(trial.number)+'.pt')
    return loss

In [11]:
suffix='/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/'
suffix+'proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[i].split('/')[-1][22:].split('.')[0])+'/losscoeff_0/'+'bestparams.pkt'

'/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/proc/VEDsmooth_9100/0_3/losscoeff_0/bestparams.pkt'

In [15]:
#for i in range(len(sorted(glob.glob('../../storage/proc/X*pkl'))[:2])):
for i in [1]:#range(len(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[1:3])):
    print(i)
    PCA = read_and_write.depickle(sorted(glob.glob('../../storage/proc/PCA/PCAsmooth9100*'))[i])
    X = read_and_write.depickle(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[i])
    y = read_and_write.depickle(sorted(glob.glob('../../storage/proc/y*'))[i])
    X['test'] = X.pop('Xtest')
    print(X['train'].shape,y['train'].shape)

    print(aaaa)
    validindices = sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[i].split('/')[-1][22:].split('.')[0]
    LWstop = np.abs(PCA['PCA']['LW'].explained_variance_ratio_.cumsum()-0.5).argmin()
    SWstop = np.abs(PCA['PCA']['SW'].explained_variance_ratio_.cumsum()-0.8).argmin()

    train_data,val_data,test_data = preproc.prepare_tensors(X,y,'No')
    batch_size = 10
    num_workers = 2
    train_loader = torch.utils.data.DataLoader(
        dataset=train_data,
        batch_size=batch_size,
        shuffle=True)
    val_loader = torch.utils.data.DataLoader(
        dataset=val_data,
        batch_size=batch_size,
        shuffle=False)
    test_loader = torch.utils.data.DataLoader(
        dataset=test_data,
        batch_size=batch_size,
        shuffle=False)
    del PCA,X,y
    gc.collect()
    
    import optuna
    nummem = [0,LWstop,SWstop]
    losscoeff=1
    study = optuna.create_study(directions=["minimize"])
    study.optimize(objective, n_trials=6)#, timeout=300)

    suffix = '/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/storage'
    #os.makedirs(suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*'))[i].split('/')[-1][12:].split('.')[0]))
    #os.makedirs(suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*'))[i].split('/')[-1][12:].split('.')[0])+'/losscoeff_0/')
    if losscoeff==1.0:
        losscoeff2 = int(losscoeff)
        read_and_write.save_to_pickle(study,
                                      suffix+'/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[i].split('/')[-1][22:].split('.')[0])+
                                      '/losscoeff_0/'+'bestparams.pkt')
        for losscoeff in [0.9,0.65,0.55,0.45,0.35,0.3,0.25,0.95,0.85,0.8,0.75,0.7,0.6,0.5,0.4]:
            read_and_write.save_to_pickle(study,suffix+'/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[i].split('/')[-1][22:].split('.')[0])+
                                          '/losscoeff_'+str(losscoeff)+'/'+'bestparams.pkt')
    else:
        read_and_write.save_to_pickle(study,suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*pkl'))[i].split('/')[-1][22:].split('.')[0])+'/losscoeff_0/'+'bestparams.pkt')

1
(461, 15) (425,)


NameError: name 'aaaa' is not defined

In [37]:
i=0
PCA = read_and_write.depickle(sorted(glob.glob('../../storage/proc/PCA/PCA*'))[i])
X = read_and_write.depickle(sorted(glob.glob('../../storage/proc/Xtimeseries*'))[i])
y = read_and_write.depickle(sorted(glob.glob('../../storage/proc/y*'))[i])
X['test'] = X.pop('Xtest')
    
validindices = sorted(glob.glob('../../storage/proc/X*pkl'))[i].split('/')[-1][12:].split('.')[0]
LWstop = np.abs(PCA['PCA']['LW'].explained_variance_ratio_.cumsum()-0.5).argmin()
SWstop = np.abs(PCA['PCA']['SW'].explained_variance_ratio_.cumsum()-0.8).argmin()

train_data,val_data,test_data = prepare_tensors(X,y,'No')
batch_size = 5
num_workers = 2
train_loader = torch.utils.data.DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    shuffle=True)
val_loader = torch.utils.data.DataLoader(
    dataset=val_data,
    batch_size=batch_size,
    shuffle=False)
test_loader = torch.utils.data.DataLoader(
    dataset=test_data,
    batch_size=batch_size,
    shuffle=False)
del PCA,X,y
gc.collect()
    
nummem = [0,LWstop,SWstop]
losscoeff=1

In [38]:
study = read_and_write.depickle(suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*pkl'))[i].split('/')[-1][12:].split('.')[0])+'/losscoeff_0/'+'bestparams.pkt')

In [39]:
times = ['exp1a','exp1b','exp1c','exp1d','exp1e','exp1f','exp1g','exp1h','exp1i']
#times = ['exp1e','exp1f','exp1g','exp1h','exp1i']#,'exp1d','exp1e']
for i in tqdm(range(len(times))):
    models,losses = [],[]
    model = vae.VAE(nummem[-2],nummem[-1],1,1,1,nummem)
    optimizers = [torch.optim.Adam(model.parameters(), lr=study.best_params['lr'])]#, optim.AdaBound(model.parameters(),lr=1e-7)] 1e-6 [torch.optim.Adam(model.parameters(),lr=0.5e-5),torch.optim.SGD(model.parameters(),lr=0.5e-5,momentum=0.8)]
    loss = torch.nn.L1Loss()
    for optimizer in optimizers:
        scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.5e-8, max_lr=7e-5,cycle_momentum=False) #1e-9/1e-5
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',min_lr=1e-12)  #1e-18
        num_epochs = 1000*40#26
        #early_stopper = ts_models.EarlyStopping(patience=250, verbose=False, delta=1e-5, path='checkpoint.pt', trace_func=print)#EarlyStopper(patience=8, min_delta=1e-3)
        early_stopper = vae.EarlyStopping(patience=2000, verbose=False, delta=1.5e-5, path='checkpoint.pt', trace_func=print)
        #variance_store = [varu,varv,varw,varth]
        #variance_store = [varu,varv,varth]
        model,loss,_ = vae.train_model(model=model,optimizer=optimizer,scheduler=[scheduler,scheduler2],numepochs=num_epochs,early_stopper=early_stopper,variance_store=None,\
                                         lossfunc=loss,train_loader=train_loader,val_loader=val_loader,test_loader=test_loader,l2_lambda=study.best_params['l2_lambda'],count=10,vaeloss_coeff=losscoeff)
        models.append(model)
        losses.append(loss)
    #torch.save(models, '../tmp/torch_try/ts/'+str(expname)+'/0/'+'models'+str(splitnum)+'_'+str(expname)+'3dnonln_1115_'+str(times[i])+'.pt')
    #read_and_proc.save_to_pickle('../tmp/torch_try/ts/'+str(expname)+'/0/'+'losses'+str(splitnum)+'_'+str(expname)+'3dnonln_1115_'+str(times[i])+'.pkt',losses,'PICKLE')
    if losscoeff==1.0:
        losscoeff2 = int(losscoeff)
        torch.save(models,suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*pkl'))[0].split('/')[-1][12:].split('.')[0])+'/losscoeff_0/'+'modelstest_vae_'+str(times[i])+'.pk')
        read_and_write.save_to_pickle(losses,suffix+'/proc/VED/'+str(sorted(glob.glob('../../storage/proc/X*pkl'))[0].split('/')[-1][12:].split('.')[0])+'/losscoeff_0/'+'lossestest_vae_'+str(times[i])+'.pkt')
    else:
        torch.save(models,filepath+'vae/losscoeff_'+str(losscoeff)+'/'+str(splitnum)+'/modelstest'+str(splitnum)+'_vae_'+str(times[i])+'.pk')
        read_and_write.save_to_pickle(filepath+'vae/losscoeff_'+str(losscoeff)+'/'+str(splitnum)+'/lossestest'+str(splitnum)+'_vae_'+str(times[i])+'.pkt',losses,'PICKLE')        

  0%|          | 0/9 [00:00<?, ?it/s]

(9.85142297907309, 9.222853348805355)
(5.639664442701773, 5.772621237314665)
(5.068732455542142, 5.500547092694503)
(5.050715463235974, 5.640253332945017)
(4.890204999934543, 5.495082222498381)
(4.846279404041442, 5.629986299918248)
(4.709101298315958, 5.655073184233445)
(4.642548099160194, 5.430186977753272)
(4.660028381103819, 5.504338076481452)
(4.674208345738324, 5.378917817886059)
(4.6938084248792045, 5.492722066549154)
(4.619749583303928, 5.486359866765829)
(9.25029461763122, 8.617649371807392)
(5.525918946178122, 5.7201358538407545)
(5.181089271198619, 5.4639218266193685)
(4.9638013697483325, 5.531228909125695)
(4.946268291635946, 5.423445513615241)
(4.749122809280049, 5.729436076604403)
(4.768325078216466, 5.5084604277060585)
(4.779350396838378, 5.470411873780764)
(4.643695956604048, 5.4870196626736565)
(4.613083222372965, 5.562157786809481)
(4.648070892149752, 5.478477196051524)
(8.29459180005572, 7.3197731421544)
(5.588655034249479, 5.591867497334113)
(5.108101380819624, 5.55

# VED: resume training

In [35]:
config_set = read_config.read_config('../../config.ini')
startname = 17

In [32]:
shutil.copy('/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/storage/proc/VED/0_6/losscoeff_0/lossestest_vae_exp1b.pkt',
            '/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/storage/proc/VED/0_6/losscoeff_0.95/lossestest_vae_exp1b.pkt')

'/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/storage/proc/VED/0_6/losscoeff_0.95/lossestest_vae_exp1b.pkt'

In [7]:
config_set = read_config.read_config('../../config.ini')

In [38]:
suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../storage/proc/Xsmooth/9100/Xtimeseries*'))[1].split('/')[-1][startname:].split('.')[0])+\
'/losscoeff_'+str(0.95)+'/'+'modelstest_vae_'+str(exp)+'.pk'

IndexError: list index out of range

In [30]:
class resume_training:
    def __init__(self,splitnum=None,droprate=None,nonln_num=None,timelag=None,batch_size=None,num_workers=2):
        self.splitnum=splitnum
        self.droprate=droprate
        self.vaeloss_coeff=nonln_num
        self.timelag = timelag
        self.batch_size = batch_size
        self.num_workers=2
        
    def get_data(self,suffix='/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/',config_set=config_set):
        #PCA = read_and_write.depickle(sorted(glob.glob(suffix+'storage/proc/PCA/PCA*'))[self.splitnum])
        X = read_and_write.depickle(sorted(glob.glob(suffix+'storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum])
        y = read_and_write.depickle(sorted(glob.glob(suffix+'storage/proc/y*'))[self.splitnum])
        X['test'] = X.pop('Xtest')
        
        validindices = sorted(glob.glob(suffix+'storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0]
        brchindex = read_and_write.depickle(suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+\
                                        '/losscoeff_0'+'/'+'nummem.pkl')
        #LWstop = np.abs(PCA['PCA']['LW'].explained_variance_ratio_.cumsum()-float(config_set['ML_LWnumcomps'])).argmin()
        #SWstop = np.abs(PCA['PCA']['SW'].explained_variance_ratio_.cumsum()-float(config_set['ML_SWnumcomps'])).argmin()
        train_data,val_data,test_data = preproc.prepare_tensors(X,y,'No')
        train_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=self.batch_size,shuffle=True)
        val_loader = torch.utils.data.DataLoader(dataset=val_data,batch_size=self.batch_size,shuffle=False)
        test_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=self.batch_size,shuffle=False)
        return train_loader,val_loader,test_loader,brchindex#[0,LWstop,SWstop]
    
    def continue_training(self,suffix='/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/',config_set=None,exp='e',scheduler_lr=[1e-14,5e-10],early_stopper=None):
        i=self.splitnum
        train_loader,val_loader,_,brchindex = self.get_data(config_set=config_set)
        study = read_and_write.depickle(suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+\
                                        '/losscoeff_'+str(self.vaeloss_coeff)+'/'+'bestparams.pkt')
        original_model = vae.VAE(brchindex[-2],brchindex[-1],1,1,1,brchindex)
        #######################################################################################################################################
        # Transfer state dict
        pretrained_model = torch.load(suffix+'storage/proc/VEDsmooth_9100/'+\
                                      str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+\
                                      '/losscoeff_'+str(self.vaeloss_coeff)+'/'+'modelstest_vae_'+str(exp)+'.pk')[0]
        model_dict = original_model.state_dict()
        pretrained_dict = pretrained_model.state_dict()
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
        model_dict.update(pretrained_dict)
        original_model.load_state_dict(model_dict)
        #######################################################################################################################################
        #######################################################################################################################################
        optimizer = torch.optim.Adam(original_model.parameters(), lr=study.best_params['lr'])
        #lossfunc = torch.nn.L1Loss()
        #scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-16, max_lr=5e-10,cycle_momentum=False) #1e-9/1e-5
        scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=scheduler_lr[0], max_lr=scheduler_lr[1],cycle_momentum=False) #1e-9/1e-5
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',min_lr=1e-20)
        #######################################################################################################################################
        
        lowest_val_loss = float('inf')
        best_model = None
        schedulerCY,schedulerLS = scheduler2,scheduler
        train_losses,trainrecon_losses,trainkl_losses = [],[],[]
        val_losses,valrecon_losses,valkl_losses = [],[],[]
        
        for epoch in tqdm(range(20000)):
            original_model.train()
            train_loss = 0
            trainrecon_loss = 0
            trainkl_loss = 0
            # Training loop here
            for features, labels in train_loader:
                optimizer.zero_grad()
                reconX,mu1,logvar1,mu2,logvar2 = original_model(features)
                batch_loss,recon_loss,kl_loss = vae.vae_loss(reconX, labels.unsqueeze(1),mu1,logvar1,mu2,logvar2,self.vaeloss_coeff)
                batch_loss.backward()
                optimizer.step()
                schedulerCY.step()
                
                train_loss += batch_loss.item() 
                trainrecon_loss += recon_loss.item()
                trainkl_loss += kl_loss.item()
                
            train_loss = train_loss / len(train_loader)
            train_losses.append(train_loss)
            trainrecon_loss = trainrecon_loss / len(train_loader)
            trainrecon_losses.append(trainrecon_loss)
            trainkl_loss = trainkl_loss / len(train_loader)
            trainkl_losses.append(trainkl_loss)

            # Validation loop
            original_model.eval()
            with torch.no_grad():
                val_loss = 0
                val_reconloss = 0
                val_klloss = 0
                val_loss,val_reconloss,val_klloss = 0,0,0
                for features, labels in val_loader:
                    reconX,mu1,logvar1,mu2,logvar2 = original_model(features)
                    batch_loss,recon_loss,kl_loss = vae.vae_loss(reconX, labels.unsqueeze(1),mu1,logvar1,mu2,logvar2,self.vaeloss_coeff)
                    val_loss+=batch_loss.item()
                    val_reconloss+=recon_loss.item()
                    val_klloss+=kl_loss.item()
            
                val_loss = val_loss / len(val_loader)
                val_reconloss = val_reconloss / len(val_loader)
                val_klloss = val_klloss / len(val_loader)
                val_losses.append(val_loss)
                valrecon_losses.append(val_reconloss)
                valkl_losses.append(val_klloss)

            # Check if the current model has the lowest validation loss
            if val_loss < lowest_val_loss:
                lowest_val_loss = val_loss
                best_model = original_model#.state_dict()

            if early_stopper:
                if early_stopper.__call__(val_loss, original_model):
                    break
                
            #torch.save(best_model, savefilepath+'vae/losscoeff_'+str(losscoeff)+'/'+str(splitnum)+'/modelstest'+str(splitnum)+'_vae_'+str(times[i])+'.pk')
            torch.save(original_model.state_dict(), suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+
                       '/losscoeff_'+str(self.vaeloss_coeff)+'/'+'modelstest_vae_'+str(exp)+'_best_weights.pk')
            torch.save(best_model, suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+
                       '/losscoeff_'+str(self.vaeloss_coeff)+'/'+'modelstest_vae_'+str(exp)+'_best.pk')
            read_and_write.save_to_pickle({'trainALL':train_losses,'valALL':val_losses,'trainRECON':trainrecon_losses,'valRECON':valrecon_losses,'trainKL':trainkl_losses,'valKL':valkl_losses},
                                          suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*'))[self.splitnum].split('/')[-1][startname:].split('.')[0])+
                                          '/losscoeff_'+str(self.vaeloss_coeff)+'/'+'lossestest_vae_'+str(exp)+'_best.pkt',
                                        )
        return None

In [36]:
for exp in ['exp1a','exp1b','exp1c','exp1d','exp1e','exp1f','exp1g','exp1h','exp1i']:#['a','b','c','d','e','f','g','h','i']:
    print(exp)
    early_stopper = vae.EarlyStopping(patience=1500, verbose=False, delta=1.5e-5, path='checkpoint.pt', trace_func=print)
    resume_training(1,None,0.95,None,5,2).continue_training(config_set=config_set,exp=exp,scheduler_lr=[1e-14,5e-10],early_stopper=early_stopper)
    #except:
    #    continue

exp1a


IndexError: list index out of range

In [26]:
import shutil
suffix='/work/FAC/FGSE/IDYST/tbeucler/default/freddy0218/2024_TCG_VED_WRFsen/'
for itime in tqdm([0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]): #1,17,18,19
    fikes = glob.glob(suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[itime].split('/')[-1][17:].split('.')[0])+
                      '/losscoeff_0/*')
    filkenames = [obj.split('/')[-1] for obj in fikes]
    for losscoeff in [0.9,0.65,0.55,0.45,0.35,0.3,0.25,0.95,0.85,0.8,0.75,0.7,0.6,0.5,0.4]:
        [shutil.copy(suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[itime].split('/')[-1][17:].split('.')[0])+'/losscoeff_0/'+str(obsz),
                     suffix+'storage/proc/VEDsmooth_9100/'+str(sorted(glob.glob('../../storage/proc/Xsmooth/9100/Xtimeseries*pkl'))[itime].split('/')[-1][17:].split('.')[0])+'/losscoeff_'+str(losscoeff)+'/'+str(obsz)) for obsz in filkenames]

  0%|          | 0/16 [00:00<?, ?it/s]