<a href="https://colab.research.google.com/github/dykim07/SSGMG/blob/master/SSGMG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install library, data set, base codes


In [0]:
# for colab
!git clone https://github.com/dykim07/SSGMG.git

Cloning into 'SSGMG'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects:   1% (1/66)   [Kremote: Counting objects:   3% (2/66)   [Kremote: Counting objects:   4% (3/66)   [Kremote: Counting objects:   6% (4/66)   [Kremote: Counting objects:   7% (5/66)   [Kremote: Counting objects:   9% (6/66)   [Kremote: Counting objects:  10% (7/66)   [Kremote: Counting objects:  12% (8/66)   [Kremote: Counting objects:  13% (9/66)   [Kremote: Counting objects:  15% (10/66)   [Kremote: Counting objects:  16% (11/66)   [Kremote: Counting objects:  18% (12/66)   [Kremote: Counting objects:  19% (13/66)   [Kremote: Counting objects:  21% (14/66)   [Kremote: Counting objects:  22% (15/66)   [Kremote: Counting objects:  24% (16/66)   [Kremote: Counting objects:  25% (17/66)   [Kremote: Counting objects:  27% (18/66)   [Kremote: Counting objects:  28% (19/66)   [Kremote: Counting objects:  30% (20/66)   [Kremote: Counting objects:  31% (21/66)   [

In [0]:
from sklearn.metrics import mean_squared_error as MSE
import os
from tqdm import tqdm
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader as torchDataLoader
from torch.utils.data import Dataset

In [0]:
# for colab
from SSGMG.dataLoader import dataLoader
from SSGMG.models.AE import AE as PTMODEL
from SSGMG.models.DFMNET import DFMNET as CALMODEL

# for local host
# from dataLoader import dataLoader
# from models.AE import AE as PTMODEL
# from models.DFMNET import DFMNET as CALMODEL


In [0]:
# params

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE_PT =  512
LR_PT = 0.001
N_EPOCHS_PT = 60
N_LATENT_DIM = 3

BATCH_SIZE = 512
LR = 0.001
N_EPOCHS = 30

In [0]:
# load dataset
path = os.path.join(os.getcwd(), 'SSGMG')
print(path)
dataloader = dataLoader(base_path = path)

(train_x, train_y) = dataloader.getTrainDataSet()
(test_x, test_y) = dataloader.getTestDataSet()
pre_train_y = dataloader.getPretrainDataSet()

print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)
print(pre_train_y.shape)

/content/SSGMG
(2400, 120, 2) (2400, 30)
(6000, 120, 2) (6000, 30)
(6000, 30)


## Step.1 Pretraining

In [0]:
train_pt_y_torch = torch.from_numpy(pre_train_y).type(torch.float).to(DEVICE)
valid_pt_y_torch = torch.from_numpy(test_y).type(torch.float).to(DEVICE)

pt_model = PTMODEL(pre_train_y.shape[-1], N_LATENT_DIM)
pt_model = pt_model.to(DEVICE)
optimizer_pt = optim.Adam(pt_model.parameters(), lr = LR_PT)
criterion_pt = nn.MSELoss()

class DiabetesDatasetPT(Dataset):
    def __init__(self, x_data):
        self.len = x_data.size(0)
        self.x_data = x_data
    
    def __getitem__(self, index):
        return self.x_data[index]
   
    def __len__(self):
        return self.len
    

torch_dataloader_pt = torchDataLoader(dataset=DiabetesDatasetPT(train_pt_y_torch),
                                     batch_size = BATCH_SIZE_PT,
                                     shuffle=True,
                                     drop_last = False)

In [0]:
# train
for epoch in tqdm(range(N_EPOCHS_PT)):
    for idx, data in enumerate(torch_dataloader_pt):
        pt_model.train()
        optimizer_pt.zero_grad()
        y_pred = pt_model(data)
        loss_t = criterion_pt(y_pred, data)
        loss_t.backward()
        optimizer_pt.step()

100%|██████████| 60/60 [00:03<00:00, 17.19it/s]


In [0]:
# check error
valid_y_pt = pt_model.transform(valid_pt_y_torch)
recon = pt_model.inverse_transform(valid_y_pt).detach().to('cpu').numpy()
rmse = np.sqrt(MSE(recon, test_y)) * 1000
print("RECON error {:0.2f}".format(rmse))

RECON error 7.98


# Step.2 calibration

In [0]:
# data
train_x_torch = torch.from_numpy(train_x).type(torch.float).to(DEVICE)
train_y_pt_torch = torch.from_numpy(train_y).type(torch.float).to(DEVICE)
train_y_pt_torch = pt_model.transform(train_y_pt_torch).detach()

class DiabetesDataSet(Dataset):
    def __init__(self, x_data, y_data):
        self.len = x_data.size(0)
        self.x_data = x_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

torch_dataloader_cal = torchDataLoader(dataset=DiabetesDataSet(train_x_torch,
                                                              train_y_pt_torch),
                                      batch_size=BATCH_SIZE,
                                      shuffle=True,
                                      drop_last=False)    
    


# model
cal_model = CALMODEL(train_x_torch.size(-1), train_y_pt_torch.size(-1))
cal_model = cal_model.to(DEVICE)
criterion_cal = nn.MSELoss()
optimizer_cal = optim.Adam(cal_model.parameters(), lr=LR)


In [0]:
for epoch in tqdm(range(N_EPOCHS)):
    for x, y in torch_dataloader_cal :
        optimizer_cal.zero_grad()
        cal_model.train()
        pred = cal_model(x)
        loss_t = criterion_cal(pred, y)
        loss_t.backward()
        optimizer_cal.step()

100%|██████████| 30/30 [00:13<00:00,  2.10it/s]


# Step.3 PostProcessing

In [0]:
with torch.no_grad():
    test_x, test_y = dataloader.getTestDataSet()
    test_x_torch = torch.from_numpy(test_x).type(torch.float).to(DEVICE)
    cal_model.eval()
    pred = cal_model(test_x_torch).detach()
    pred = pt_model.inverse_transform(pred).detach().to('cpu').numpy()
    rmse = np.sqrt(MSE(test_y, pred))*1000
    print('RMSE: %0.2f' % rmse)

RMSE: 25.21


In [0]:
with torch.no_grad():
    cal_model.eval()
    for tag in dataloader.tags:
        test_x, test_y = dataloader.getTestDataSetTags(tag=tag)
        test_x_torch = torch.from_numpy(test_x).type(torch.float).to(DEVICE)
        pred = cal_model(test_x_torch).detach()
        pred = pt_model.inverse_transform(pred).detach().to('cpu').numpy()
        rmse = np.sqrt(MSE(test_y, pred))*1000
        print(" {} RMSE: {:0.2f}".format(tag, rmse))
        

 W2 RMSE: 26.05
 W3 RMSE: 34.58
 W4 RMSE: 15.61
 W5 RMSE: 20.75
 W6 RMSE: 25.11
