In [1]:
import numpy as np 
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torch.optim as optimizer
from collections import OrderedDict
from torch.optim.lr_scheduler import ReduceLROnPlateau

import random
import time

In [2]:
%run "C:\SBU-3\Jupyter-Research\RSCR\\2 Machine Learning\\4 CVAE Anar\\CVAE Model.ipynb""

In [3]:
class VAEDataset(Dataset):
    def __init__(self):
        
        paths = np.load('C:\SBU-3\Jupyter-Research\RSCR\Saved Data\\555\\4_Path_555.npy')
        mechs = np.load('C:\SBU-3\Jupyter-Research\RSCR\Saved Data\\555\\4_Mec_555_16.npy')
        paths = paths.reshape((paths.shape[0], paths.shape[1]*paths.shape[2]))
        
        if mechs.shape[1] == 16:
            mechs = mechs.reshape((mechs.shape[0], mechs.shape[1]))
        else:
            mechs = mechs.reshape((mechs.shape[0], mechs.shape[1]*mechs.shape[2]))
            
        self.data = np.hstack((paths, mechs))
        random.shuffle(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path_mech = self.data[idx]
        return path_mech[:300].astype(np.float32), path_mech[300:].astype(np.float32)

In [4]:
def loss_eval_cvae(data_loader, model, loss_fn):
    model.eval()
    overall_loss = 0
    kld_loss = 0
    mse_loss = 0 
    
    data_inx = 0
    with torch.no_grad():
        for batch_inx, data in enumerate(data_loader):
            data_inx += 1
            path = data[0].to(DEVICE)
            joints = data[1].to(DEVICE)
            
            recon_joints, mu, logvar = model.forward(joints, path)
            loss, kld, mse = loss_fn(recon_joints, joints, mu, logvar)
            overall_loss += loss.item()
            kld_loss += kld.item()
            mse_loss += mse.item()
        
    overall_loss = overall_loss/data_inx
    kld_loss = kld_loss/data_inx
    mse_loss = mse_loss/data_inx
    
    return overall_loss, kld_loss, mse_loss

In [5]:
def CVAE_Train(train_loader, val_loader, BATCH_SIZE, LR, EPOCH, model):
    start_time = time.time()
    'Define loss function'
    def loss_fn(recon_x, x, mu, logvar):
        KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
        mse_loss = nn.MSELoss(reduction='mean')(recon_x, x)
        return mse_loss+10*KLD, 10*KLD, mse_loss
        
    'Define optimizer'
    optim = optimizer.Adam(model.parameters(), lr=LR)
    scheduler = ReduceLROnPlateau(optim, mode='min', factor=0.1, patience=7, threshold=1e-4, cooldown=10)
    epochs = EPOCH
    
    'Print starting loss'
    train_loss, train_kld, train_mse = loss_eval_cvae(train_loader, model, loss_fn)
    print("\tStart Train Loss:", train_loss, "\tStart KLD:", train_kld,"\tStart MSE:", train_mse)
    #wandb.log({"Train loss": train_loss, "Train KLD":train_kld, "Train MSE": train_mse})
    
    val_loss, val_kld, val_mse = loss_eval_cvae(val_loader, model, loss_fn)
    print("\tStart Val Loss:", val_loss, "\tStart KLD:", val_kld,"\tStart MSE:", val_mse)
    #wandb.log({"Val loss":val_loss, "Val KLD":val_kld, "Val MSE": val_mse})
    print('-----------------------------------------------------------------------------------------------------')
    'Train'
    for epoch in range(epochs):
        model.train()
        
        train_loss = 0
        train_kld = 0
        train_mse = 0
        
        train_inx = 0
        for batch_inc, data in enumerate(train_loader):
            train_inx += 1
            #x = torch.flatten(x[0], start_dim=1)
            path = data[0].to(DEVICE)
            joints = data[1].to(DEVICE)
            
            optim.zero_grad()
            recon_joints, mu, logvar = model.forward(joints, path)
            
            loss, kld, mse = loss_fn(recon_joints, joints, mu, logvar)
            train_loss += loss.item()
            train_kld += kld.item()
            train_mse += mse.item()
            print(train_loss)
            
            loss.backward()
            optim.step()  
        train_loss = train_loss/train_inx
        train_kld = train_kld/train_inx
        train_mse = train_mse/train_inx
        #wandb.log({"Train loss": train_loss, "Train KLD": train_kld, "Train MSE": train_mse})
      
        'Validation in Train'
        val_loss, val_kld, val_mse = loss_eval_cvae(val_loader, model, loss_fn)
        #wandb.log({"Val loss":val_loss, "Val KLD":val_kld, "Val MSE": val_mse})   
        if epoch%50 == 0:
            print('-----------------------------------------------------------------------------------------------------')
            print("\tEpoch:", epoch, "complete!", "\tTime Taken:", round((time.time() - start_time)/60,2),"mins")
            print("\tTrain Loss:", train_loss, "\tTrain KLD:", train_kld,"\tTrain MSE:", train_mse)
            print("\tVal Loss:", val_loss, "\tVal KLD:", val_kld,"\tVal MSE:", val_mse)
        
        scheduler.step(val_loss)
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
    print("Training Complete!")
    

    #wandb.finish()
    return val_loss, val_kld, val_mse
        
        

In [6]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else "cpu")

In [7]:
mechanism_size = 16
curve_size = 300
batch_size = 1024
latent_dim = 2048

dataset = VAEDataset()

dataset_size = len(dataset)
train_size = int(0.9 * dataset_size)
val_size = dataset_size - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [8]:
'Login Wandb'
import wandb
wandb.login()

wandb.init(
    # set the wandb project where this run will be logged
    project="RSCR Journal",
    name = "4_555_cVAE_A",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 1e-3,
    "architecture": "cVAE_Anar",
    "dataset": "4_Mec_555",
    "epochs": 150,
    "batchsize": 2048,
    "Weight Decay": 0,
    }
    
)

'Train'
model_1 = cVAE(mechanism_size, latent_dim, curve_size)
model_1 = model_1.to(DEVICE)
LR = 1e-3
EPOCH = 1


early_stopping = EarlyStopping('/home/xudeng/XD/data/model', name='4_555_mapper_A_256_2048*6_d0.1')
loss_temp = CVAE_Train(train_loader, val_loader, batch_size, LR, EPOCH, model_1)
LOSS.append("Train 1")
LOSS.append(loss_temp)

	Start Train Loss: 3.3565389981771157 	Start KLD: 2.61965755961635 	Start MSE: 0.736881436083311
	Start Val Loss: 3.3571300765742427 	Start KLD: 2.6193584203720093 	Start MSE: 0.7377716626809991
-----------------------------------------------------------------------------------------------------
3.3590283393859863
34.75242471694946
43.895023822784424
50.140634059906006
54.055320024490356
56.3044319152832
58.159303426742554
59.700817823410034
60.92042624950409
61.87167185544968
62.93512183427811
63.944606602191925
64.78537672758102
65.56217581033707
66.31423437595367
67.01579582691193
67.67801773548126
68.29881542921066
68.9210873246193
69.52965837717056
70.07686096429825
70.59313029050827
71.11443883180618
71.6447166800499
72.14212656021118
72.62214922904968
73.08666127920151
73.54874587059021
74.0005242228508
74.4421392083168
74.8739053606987
75.31096720695496
75.73340347409248
76.14241623878479
76.55181014537811
76.96958720684052
77.37199074029922
77.77397730946541
78.17441865801811


NameError: name 'early_stopping' is not defined