In [1]:
import torch
import pandas as pd
import numpy as np
import copy
import gc
from torch.autograd import Variable
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import TensorDataset,  DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import optuna
import os
import random
import wandb

In [2]:
pd.set_option('mode.chained_assignment', None) # 경고 off
pd.options.display.float_format = '{:.5f}'.format
optuna.logging.set_verbosity(optuna.logging.INFO)

In [3]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False

In [4]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x206316323d0>

In [5]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['PYTHONHASHSEED'] = str(seed)

In [6]:
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.version.cuda)
print('학습을 진행하는 기기:', torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

torch.set_printoptions(precision=4, sci_mode=False)
torch.backends.cudnn.enabled = False
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = True

2.2.1
True
1
12.1
학습을 진행하는 기기: cuda:0


Data

In [7]:
train_dataset = torch.load("train_UrineSCr & UrineSCr_dataset.pt")
valid_dataset = torch.load("valid_UrineSCr & UrineSCr_dataset.pt")
calibration_dataset = torch.load("calibration_UrineSCr & UrineSCr_dataset.pt")
test_dataset = torch.load("test_UrineSCr & UrineSCr_dataset.pt")

In [8]:
dataloaders = []
dataset_names = ["train", "valid", "calibration", "test"]

for name in dataset_names:
    dataloader = DataLoader(eval(name + "_dataset"), batch_size=1, shuffle=True, drop_last=False)
    dataloaders.append(dataloader)

train_dataloader, valid_dataloader, calibration_dataloader, test_dataloader = dataloaders

In [9]:
for batch in train_dataloader.dataset:
    X_numeric, X_presence, Y_main, Y_sub, mask = batch.tensors
    print("X_numeric shape:", X_numeric.shape)
    print("X_presence shape:", X_presence.shape)
    print("Y_main shape:", Y_main.shape)
    print("Y_sub shape:", Y_sub.shape)
    print("mask shape:", mask.shape)
    break

X_numeric shape: torch.Size([43984, 56, 58])
X_presence shape: torch.Size([43984, 56, 83])
Y_main shape: torch.Size([43984, 8, 56])
Y_sub shape: torch.Size([43984, 4, 56])
mask shape: torch.Size([43984, 56])


# overweight for positive weight

In [10]:
train = pd.read_parquet('train_UrineSCr & UrineSCr.parquet')
valid = pd.read_parquet('valid_UrineSCr & UrineSCr.parquet')
train_id = pd.read_parquet('train_id.parquet')
valid_id = pd.read_parquet('valid_id.parquet')

In [11]:
pos_weights_main = []
pos_weights_main.append(sum(train['GT_presence_6'] == 0) / sum(train['GT_presence_6'] == 1))
pos_weights_main.append(sum(train['GT_presence_12'] == 0) / sum(train['GT_presence_12'] == 1))
pos_weights_main.append(sum(train['GT_presence_18'] == 0) / sum(train['GT_presence_18'] == 1))
pos_weights_main.append(sum(train['GT_presence_24'] == 0) / sum(train['GT_presence_24'] == 1))
pos_weights_main.append(sum(train['GT_presence_30'] == 0) / sum(train['GT_presence_30'] == 1))
pos_weights_main.append(sum(train['GT_presence_36'] == 0) / sum(train['GT_presence_36'] == 1))
pos_weights_main.append(sum(train['GT_presence_42'] == 0) / sum(train['GT_presence_42'] == 1))
pos_weights_main.append(sum(train['GT_presence_48'] == 0) / sum(train['GT_presence_48'] == 1))
pos_weights_main = torch.Tensor(pos_weights_main)
pos_weights_main_train = pos_weights_main

In [12]:
pos_weights_main = []
pos_weights_main.append(sum(valid['GT_presence_6'] == 0) / sum(valid['GT_presence_6'] == 1))
pos_weights_main.append(sum(valid['GT_presence_12'] == 0) / sum(valid['GT_presence_12'] == 1))
pos_weights_main.append(sum(valid['GT_presence_18'] == 0) / sum(valid['GT_presence_18'] == 1))
pos_weights_main.append(sum(valid['GT_presence_24'] == 0) / sum(valid['GT_presence_24'] == 1))
pos_weights_main.append(sum(valid['GT_presence_30'] == 0) / sum(valid['GT_presence_30'] == 1))
pos_weights_main.append(sum(valid['GT_presence_36'] == 0) / sum(valid['GT_presence_36'] == 1))
pos_weights_main.append(sum(valid['GT_presence_42'] == 0) / sum(valid['GT_presence_42'] == 1))
pos_weights_main.append(sum(valid['GT_presence_48'] == 0) / sum(valid['GT_presence_48'] == 1))
pos_weights_main = torch.Tensor(pos_weights_main)
pos_weights_main_valid = pos_weights_main

In [13]:
pos_weights_sub = []
pos_weights_sub.append((len(train_id)- sum(train_id['RRT'] == 1)) / sum(train_id['RRT'] == 1))
pos_weights_sub.append(sum(train['GT_stage_3'] == 0) / sum(train['GT_stage_3'] == 1))
pos_weights_sub.append(sum(train['GT_stage_2'] == 0) / sum(train['GT_stage_2'] == 1))
pos_weights_sub.append(sum(train['GT_stage_1'] == 0) / sum(train['GT_stage_1'] == 1))
pos_weights_sub = torch.Tensor(pos_weights_sub)
pos_weights_sub_train = pos_weights_sub

In [14]:
pos_weights_sub = []
pos_weights_sub.append((len(valid_id)- sum(valid_id['RRT'] == 1)) / sum(valid_id['RRT'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_3'] == 0) / sum(valid['GT_stage_3'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_2'] == 0) / sum(valid['GT_stage_2'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_1'] == 0) / sum(valid['GT_stage_1'] == 1))
pos_weights_sub = torch.Tensor(pos_weights_sub)
pos_weights_sub_valid = pos_weights_sub

In [15]:
pos_weights_sub = []
pos_weights_sub.append(sum(valid['GT_stage_3D'] == 0) / sum(valid['GT_stage_3D'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_3'] == 0) / sum(valid['GT_stage_3'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_2'] == 0) / sum(valid['GT_stage_2'] == 1))
pos_weights_sub.append(sum(valid['GT_stage_1'] == 0) / sum(valid['GT_stage_1'] == 1))
pos_weights_sub = torch.Tensor(pos_weights_sub)
pos_weights_sub_valid = pos_weights_sub

Model

In [16]:
class EarlyStopping:
    def __init__(self, patience, loss_names, verbose=False, delta=0, path='Earlystopping.pt'):
        self.patience = patience
        self.loss_names = loss_names
        self.verbose = verbose
        self.counters = 0
        self.best_scores = {loss_name: None for loss_name in loss_names}
        self.early_stop = False
        self.loss_min = {loss_name: np.Inf for loss_name in loss_names}
        self.loss_min_past = {loss_name: np.Inf for loss_name in loss_names}
        self.delta = delta
        self.path = path

    def __call__(self, losses, model, epoch, num_epochs, avg_train_loss, avg_train_main_loss, avg_train_sub_loss,avg_valid_loss, avg_valid_main_loss,avg_valid_sub_loss):
        all_losses_improved = all(loss_value < self.loss_min[loss_name] for loss_name, loss_value in losses.items())
        
        if all_losses_improved:
            for loss_name, loss_value in losses.items():
                if self.best_scores[loss_name] is None:
                    self.best_scores[loss_name] = loss_value
                    self.loss_min[loss_name] = loss_value
                    self.save_checkpoint(losses, model)
                elif loss_value < self.best_scores[loss_name]:
                    self.best_scores[loss_name] = loss_value
                    self.loss_min_past[loss_name] = self.loss_min[loss_name]
                    self.loss_min[loss_name] = loss_value
                    self.save_checkpoint(losses, model)
                    self.counters = 0
            if self.verbose:
                print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {avg_train_loss:.4f} - main Loss: {avg_train_main_loss:.4f} - sub Loss: {avg_train_sub_loss:.4f} - Valid Loss: {avg_valid_loss:.4f} - main Loss: {avg_valid_main_loss:.4f} - sub Loss: {avg_valid_sub_loss:.4f}")
        else:
            self.counters += 1
            if self.counters >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, losses, model):
        torch.save(model.state_dict(), self.path)
        torch.cuda.empty_cache()


In [17]:
class PCGrad():
    def __init__(self, optimizer, reduction='mean'):
        self._optim, self._reduction = optimizer, reduction
        return

    @property
    def optimizer(self):
        return self._optim

    def zero_grad(self):
        '''
        clear the gradient of the parameters
        '''

        return self._optim.zero_grad(set_to_none=True)

    def step(self):
        '''
        update the parameters with the gradient
        '''

        return self._optim.step()

    def pc_backward(self, objectives):
        '''
        calculate the gradient of the parameters

        input:
        - objectives: a list of objectives
        '''
        grads, shapes, has_grads = self._pack_grad(objectives)
        pc_grad = self._project_conflicting(grads, has_grads)
        pc_grad = self._unflatten_grad(pc_grad, shapes[0])
        self._set_grad(pc_grad)
        
        return

    def _project_conflicting(self, grads, has_grads, shapes=None):

        shared = torch.stack(has_grads).prod(0).bool()
        pc_grad  = copy.deepcopy(grads)
        for g_i in pc_grad:
            random.shuffle(grads)
            for g_j in grads:
                g_i_g_j = torch.dot(g_i, g_j)
                if g_i_g_j < 0:
                    g_i -= (g_i_g_j) * g_j / (g_j.norm()**2)

        merged_grad = torch.zeros_like(grads[0])

        if self._reduction:
            merged_grad[shared] = torch.stack([g[shared]
                                        for g in pc_grad]).mean(dim=0)
        elif self._reduction == 'sum':
            merged_grad[shared] = torch.stack([g[shared]
                                        for g in pc_grad]).sum(dim=0)
            
        else: exit('invalid reduction method')

        merged_grad[~shared] = torch.stack([g[~shared]
                                            for g in pc_grad]).sum(dim=0)

        return merged_grad.clone().detach()

    def _set_grad(self, grads):

        '''
        set the modified gradients to the network
        '''
        idx = 0
        for group in self._optim.param_groups:
            for p in group['params']:
                # if p.grad is None: continue
                p.grad = grads[idx]
                idx += 1

        return

    def _pack_grad(self, objectives):
        '''
        pack the gradient of the parameters of the network for each objective
        
        output:
        - grad: a list of the gradient of the parameters
        - shape: a list of the shape of the parameters
        - has_grad: a list of mask represent whether the parameter has gradient
        '''

        grads, shapes, has_grads = [], [], []

        self._optim.zero_grad(set_to_none=True)
        objectives[0].backward(retain_graph=True)
        grad, shape, has_grad = self._retrieve_grad()
        grads.append(self._flatten_grad(grad, shape))
        has_grads.append(self._flatten_grad(has_grad, shape))
        shapes.append(shape)
    
        self._optim.zero_grad(set_to_none=True)
        objectives[1].backward(retain_graph=False)
        grad, shape, has_grad = self._retrieve_grad()
        grads.append(self._flatten_grad(grad, shape))
        has_grads.append(self._flatten_grad(has_grad, shape))
        shapes.append(shape)

        return grads, shapes, has_grads

    def _unflatten_grad(self, grads, shapes):

        unflatten_grad, idx = [], 0
        for shape in shapes:
            length = np.prod(shape)
            unflatten_grad.append(Variable(grads[idx:(idx + length)].view(shape).clone(), requires_grad = False))
            idx += length
        
        return unflatten_grad

    def _flatten_grad(self, grads, shapes):

        flatten_grad = torch.cat([g.flatten() for g in grads])
        return flatten_grad.clone().detach()

    def _retrieve_grad(self):
        '''
        get the gradient of the parameters of the network with specific 
        objective
        
        output:
        - grad: a list of the gradient of the parameters
        - shape: a list of the shape of the parameters
        - has_grad: a list of mask represent whether the parameter has gradient
        '''
        grad, shape, has_grad = [], [], []
        for group in self._optim.param_groups:
            for p in group['params']:
                # if p.grad is None: continue
                # tackle the multi-head scenario
                
                if p.grad is None:
                    shape.append(p.shape)
                    grad.append(torch.zeros_like(p).to(p.device))
                    has_grad.append(torch.zeros_like(p).to(p.device))
                    continue

                shape.append(p.grad.shape)
                grad.append(p.grad.clone())
                has_grad.append(torch.ones_like(p).to(p.device))
    
        return grad, shape, has_grad

In [18]:
class EmbeddingModule(nn.Module):
    def __init__(self, embedding_size, embedding_num_layers, activation_type, LN, seq_len, numeric_input_size, presence_input_size, CB):
        super(EmbeddingModule, self).__init__()

        self.CB = CB
        self.LN = LN
        self.num_layers = embedding_num_layers

        # 활성화 함수 매핑
        activation_functions = {
            'ReLU': nn.ReLU(),
            'LeakyReLU': nn.LeakyReLU(),
            'Tanh': nn.Tanh(),
            'ELU': nn.ELU(),
            'SELU': nn.SELU(),
            'CELU': nn.CELU(),
            'GELU': nn.GELU(),
        }

        # 선택된 활성화 함수
        activation_function = activation_functions.get(activation_type, nn.ReLU())  # 기본값은 ReLU

        self.embedding_numeric = nn.ModuleList([
            nn.Sequential(
                nn.Linear(numeric_input_size if i == 0 else embedding_size, embedding_size),
                activation_function,
            ) for i in range(self.num_layers)
        ])

        self.embedding_presence = nn.ModuleList([
            nn.Sequential(
                nn.Linear(presence_input_size if i == 0 else embedding_size, embedding_size),
                activation_function,
            ) for i in range(self.num_layers)
        ])
        
        self.LN = nn.LayerNorm(normalized_shape=(seq_len, 2 * embedding_size if CB else embedding_size), eps=1e-05)

    def forward(self, x_numeric, x_presence):
        
        for i in range(len(self.embedding_numeric)):
            x_numeric = self.embedding_numeric[i](x_numeric)
            x_presence = self.embedding_presence[i](x_presence)

        if self.CB :
            embedded = torch.cat([x_numeric, x_presence], dim=2)

        else :
            embedded = x_numeric + x_presence

        if self.LN:
            embedded = self.LN(embedded)

        return embedded


In [19]:
class RecurrentModule(nn.Module):
    def __init__(self, hidden_size, embedding_size, recurrent_num_layers, recurrent_type, highway_network,CB):
        super(RecurrentModule, self).__init__()

        self.embedding_size = embedding_size
        self.num_layers = recurrent_num_layers
        self.highway_network = highway_network

        # 리커런트 모듈 매핑
        recurrent_modules = {
            'RNN': nn.RNN,
            'GRU': nn.GRU,
            'LSTM': nn.LSTM
        }

        # 선택된 리커런트 모듈
        recurrent_module = recurrent_modules.get(recurrent_type, nn.RNN)  # 기본값은 RNN

        input_size =  2 * self.embedding_size if CB else self.embedding_size

        self.recurrent_layers = nn.ModuleList([
            recurrent_module(input_size if i == 0 else hidden_size, hidden_size, 1, batch_first=True) for i in range(self.num_layers)
        ])

        if self.highway_network:
            # Create a list to hold the highway layers dynamically (except the final layer)
            self.highway_layers = nn.ModuleList([
                nn.Sequential(
                    nn.Linear(hidden_size, hidden_size),
                    nn.Linear(hidden_size, hidden_size)
                )
                for _ in range(self.num_layers - 1)
            ])

        self.fc_main = nn.Linear(hidden_size, 8)
        self.fc_sub = nn.Linear(hidden_size, 4)

    def forward(self, x):
        
        out = x

        if (self.num_layers != 1):
            if self.highway_network:
                for i in range(self.num_layers - 1):
                    out, _ = self.recurrent_layers[i](out)

                    # Apply the highway network
                    h = out
                    t = torch.sigmoid(self.highway_layers[i][0](h))
                    transformed = torch.relu(self.highway_layers[i][1](h))
                    out = t * transformed + (1 - t) * h
            else: 
                for i in range(self.num_layers - 1):
                    out, _ = self.recurrent_layers[i](out)
        else:
            out, _ = self.recurrent_layers[-1](out)

        out_main = self.fc_main(out)
        out_sub = self.fc_sub(out)

        out_main = out_main.transpose(1,2).contiguous()
        out_sub = out_sub.transpose(1,2).contiguous()

        return out_main, out_sub


In [20]:
class AKIPredictionModel(nn.Module):
    def __init__(self, hidden_size, embedding_size, recurrent_num_layers, embedding_num_layers, activation_type, recurrent_type, seq_len, LN, highway_network, numeric_input_size, presence_input_size, CB):
        super(AKIPredictionModel, self).__init__()

        self.embedding_module = EmbeddingModule(embedding_size, embedding_num_layers, activation_type, LN, seq_len, numeric_input_size, presence_input_size, CB)
        self.recurrent_module = RecurrentModule(hidden_size, embedding_size, recurrent_num_layers, recurrent_type, highway_network, CB)

    def forward(self, x_numeric, x_presence):

        embedded = self.embedding_module(x_numeric, x_presence)
        out_main, out_sub = self.recurrent_module(embedded)

        return out_main, out_sub

In [21]:
class CustomBCELoss(nn.Module):
    def __init__(self, pos_weight):
        super(CustomBCELoss, self).__init__()
        self.pos_weight = pos_weight

    def forward(self, input, target):
        # 각 클래스에 대한 긍정 가중치 적용
        eps = 1e-12
        input_clamped = torch.clamp(input, min=eps, max= (1 - eps))
        loss = - (self.pos_weight * target * torch.log(input_clamped) + (1 - target) * torch.log(1 - input_clamped))
        
        return loss.mean()

# Train

In [22]:
def cdf(df):
    df, _ = torch.cummax(df, axis=1)
    return df

In [23]:
def train_CDFo(model, train_dataloader, valid_dataloader, learning_rate, num_epochs, lr_decay_factor, lr_decay_steps, LD, path, batchsize) :

    criterion_main_train = [CustomBCELoss(pos_weight=pos_weight) for pos_weight in pos_weights_main_train]
    criterion_sub_train = [CustomBCELoss(pos_weight=pos_weight) for pos_weight in pos_weights_sub_train]

    criterion_main_valid = [CustomBCELoss(pos_weight=pos_weight) for pos_weight in pos_weights_main_valid]
    criterion_sub_valid = [CustomBCELoss(pos_weight=pos_weight) for pos_weight in pos_weights_sub_valid]

    losses = {
    'valid_loss': 0,
    'main_loss': 0,
    'sub_loss': 0
    }

    loss_names = ['valid_loss', 'main_loss', 'sub_loss']
    early_stopping = EarlyStopping(patience = lr_decay_steps * 2, path=path, loss_names=loss_names, verbose = False)

    optimizer = PCGrad(optim.Adam(model.parameters(), lr=learning_rate))
    scheduler = ExponentialLR(optimizer.optimizer, gamma=lr_decay_factor)

    step = 0

    for epoch in range(num_epochs):      

        train_batch_dataloader = DataLoader(train_dataloader.dataset[0], batch_size = batchsize, shuffle = True, drop_last = False, pin_memory = True, num_workers = 4)

        for data in train_batch_dataloader:

            step += 1
            model.train()

            inputs_numeric, inputs_presence, targets_main, targets_sub, mask = [d.to(device) for d in data]
            out_main, out_sub = model(inputs_numeric, inputs_presence)

            train_loss = 0.0
            train_main_loss = 0.0
            train_sub_loss = 0.0 

            out_main = F.sigmoid(out_main)
            out_sub = F.sigmoid(out_sub)

            out_main = cdf(out_main)
            out_sub = cdf(out_sub)

            train_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_train))
            train_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_train))     
            
            train_loss += (train_main_loss + train_sub_loss).item()
            optimizer.pc_backward([train_main_loss,train_sub_loss])
            optimizer.step()

            with torch.no_grad():
            
                model.eval()
                valid_loss = 0.0
                valid_main_loss = 0.0
                valid_sub_loss = 0.0

                for data in valid_dataloader.dataset:
                
                    inputs_numeric, inputs_presence, targets_main, targets_sub, mask = [d.to(device) for d in data.tensors]
                    out_main, out_sub = model(inputs_numeric, inputs_presence)

                    out_main = F.sigmoid(out_main)
                    out_sub = F.sigmoid(out_sub)

                    out_main = cdf(out_main)
                    out_sub = cdf(out_sub)

                    valid_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_valid))
                    valid_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_valid))

                valid_loss += (valid_main_loss + valid_sub_loss).item()

                losses['valid_loss'] = valid_loss
                losses['main_loss'] = valid_main_loss.item()
                losses['sub_loss'] = valid_sub_loss.item()

                early_stopping(losses, model, epoch, num_epochs, train_loss, train_main_loss, train_sub_loss, valid_loss, valid_main_loss, valid_sub_loss)

                wandb.log({'step': step,"train:":train_loss, "main_train:":train_main_loss,"sub_train:":train_sub_loss, "val:":valid_loss, "main_val:": valid_main_loss, "sub_val:": valid_sub_loss})

                gc.collect()
                torch.cuda.empty_cache()

                if early_stopping.early_stop:

                    valid_loss = 0.0
                    valid_main_loss = 0.0
                    valid_sub_loss = 0.0
     
                    model.load_state_dict(torch.load(path))
                    out_main, out_sub = model(inputs_numeric, inputs_presence)
                    
                    out_main = F.sigmoid(out_main)
                    out_sub = F.sigmoid(out_sub)
                    
                    out_main = cdf(out_main)
                    out_sub = cdf(out_sub)

                    valid_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_valid))
                    valid_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_valid))
                    valid_loss += (valid_main_loss + valid_sub_loss).item()
                    break
            
                elif (((step % lr_decay_steps) == 0) & (LD)) :
                    scheduler.step()  
                    continue

            if early_stopping.early_stop : break

        if early_stopping.early_stop : break

    return model, valid_loss

In [24]:
def train_CDFx(model, train_dataloader, valid_dataloader, learning_rate, num_epochs, lr_decay_factor, lr_decay_steps, LD, path, batchsize) :

    criterion_main_train = [nn.BCEWithLogitsLoss(pos_weight=pos_weight) for pos_weight in pos_weights_main_train]
    criterion_sub_train = [nn.BCEWithLogitsLoss(pos_weight=pos_weight) for pos_weight in pos_weights_sub_train]

    criterion_main_valid = [nn.BCEWithLogitsLoss(pos_weight=pos_weight) for pos_weight in pos_weights_main_valid]
    criterion_sub_valid = [nn.BCEWithLogitsLoss(pos_weight=pos_weight) for pos_weight in pos_weights_sub_valid]

    losses = {
    'valid_loss': 0,
    'main_loss': 0,
    'sub_loss': 0
    }

    loss_names = ['valid_loss', 'main_loss', 'sub_loss']
    early_stopping = EarlyStopping(patience = lr_decay_steps * 2, path=path, loss_names=loss_names, verbose = False)

    optimizer = PCGrad(optim.Adam(model.parameters(), lr=learning_rate))
    scheduler = ExponentialLR(optimizer.optimizer, gamma=lr_decay_factor)

    step = 0

    for epoch in range(num_epochs):      

        train_batch_dataloader = DataLoader(train_dataloader.dataset[0], batch_size = batchsize, shuffle = True, drop_last = False, pin_memory = True, num_workers = 4)

        for data in train_batch_dataloader:

            step += 1
            model.train()

            inputs_numeric, inputs_presence, targets_main, targets_sub, mask = [d.to(device) for d in data]
            out_main, out_sub = model(inputs_numeric, inputs_presence)

            train_loss = 0.0
            train_main_loss = 0.0
            train_sub_loss = 0.0 

            train_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_train))
            train_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_train))     
            
            train_loss += (train_main_loss + train_sub_loss).item()
            optimizer.pc_backward([train_main_loss,train_sub_loss])
            optimizer.step()
            
            with torch.no_grad():
            
                model.eval()

                for data in valid_dataloader.dataset:
                
                    valid_loss = 0.0
                    valid_main_loss = 0.0
                    valid_sub_loss = 0.0

                    inputs_numeric, inputs_presence, targets_main, targets_sub, mask = [d.to(device) for d in data.tensors]
                    out_main, out_sub = model(inputs_numeric, inputs_presence)

                    valid_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_valid))
                    valid_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_valid))

                valid_loss += (valid_main_loss + valid_sub_loss).item()

                losses['valid_loss'] = valid_loss
                losses['main_loss'] = valid_main_loss.item()
                losses['sub_loss'] = valid_sub_loss.item()

                early_stopping(losses, model, epoch, num_epochs, train_loss, train_main_loss, train_sub_loss, valid_loss, valid_main_loss, valid_sub_loss)

                wandb.log({'step': step,"train:":train_loss, "main_train:":train_main_loss,"sub_train:":train_sub_loss, "val:":valid_loss, "main_val:": valid_main_loss, "sub_val:": valid_sub_loss})

                gc.collect()
                torch.cuda.empty_cache()
            
                if early_stopping.early_stop:

                    valid_loss = 0.0
                    valid_main_loss = 0.0
                    valid_sub_loss = 0.0
        
                    model.load_state_dict(torch.load(path))
                    out_main, out_sub = model(inputs_numeric, inputs_presence)
                    valid_main_loss += sum(criterion(out_main[:, j], targets_main[:, j]) for j, criterion in enumerate(criterion_main_valid))
                    valid_sub_loss += sum(criterion(out_sub[:, j], targets_sub[:, j]) for j, criterion in enumerate(criterion_sub_valid))
                    valid_loss += (valid_main_loss + valid_sub_loss).item()
                    break
            
                elif (((step % lr_decay_steps) == 0) & (LD)) :
                    scheduler.step()  
                    continue

            if early_stopping.early_stop : break

        if early_stopping.early_stop : break    

    return model, valid_loss

# Optuna

In [27]:
def objective(trial):

    numeric_input_size = len(train_dataloader.dataset[0].tensors[0][0][0])
    presence_input_size = len(train_dataloader.dataset[0].tensors[1][0][0])

    seq_len = 56
    num_epochs = 1000000

    # Define hyperparameters to be optimized
    hidden_size = trial.suggest_int("hidden_size", 50 , 200, step = 50)
    embedding_size = trial.suggest_int("embedding_size", 25, 100, step = 25)

    recurrent_num_layers = trial.suggest_int("recurrent_num_layers", 1, 5)  # 1부터 3까지의 정수 값 중 하나 선택
    embedding_num_layers = trial.suggest_int("embedding_num_layers", 1, 5)  # 1부터 3까지의 정수 값 중 하나 선택

    CB = trial.suggest_categorical("CB", [0 ,1]) # 0 : sum , 1 : concat
    recurrent_type = trial.suggest_categorical("recurrent_type", ['LSTM','RNN','GRU'])
    activation_type = trial.suggest_categorical("activation_type", ['ReLU','LeakyReLU','Tanh','ELU','SELU','CELU','GELU'])
    
    batchsize = trial.suggest_categorical("Batchsize", [64, 128, 256, 512])
    learning_rate = trial.suggest_categorical("learning_rate", [1e-4, 1e-3, 1e-2])
    lr_decay_steps = trial.suggest_categorical("lr_decay_steps", [800, 400, 200, 100])
    lr_decay_factor = trial.suggest_categorical("lr_decay_factor", [0.7, 0.8, 0.85, 0.9, 0.95])
 
    HN = trial.suggest_categorical("highway_network", [0, 1])
    LD = trial.suggest_categorical("LD", [0, 1])
    LN = trial.suggest_categorical("LN", [0, 1])
    CDF = trial.suggest_categorical("CDF", [0, 1])

    wandb.init(
        project='towards better clinical applicability', name=f'A-{trial.number}', reinit=True,
        config={
        'hidden_size':hidden_size,
        'embedding_size':embedding_size,
        'recurrent_num_layers': recurrent_num_layers,
        'embedding_num_layers':embedding_num_layers,
        'Combinbation':CB,
        'recurrent_type': recurrent_type,
        'activation_type': activation_type,
        "batch_size":batchsize,
        "learning_rate":learning_rate,
        "lr_decay_factor":lr_decay_factor,
        "lr_decay_steps":lr_decay_steps,
        "highway_network":HN,
        "learning_decay":LD,
        "Layer_Normalization":LN,
        "CDF":CDF,
    })
    
    # Create and train the model with the specified hyperparameters
    model = AKIPredictionModel(hidden_size, embedding_size, recurrent_num_layers, embedding_num_layers, activation_type, recurrent_type, seq_len, LN, HN, numeric_input_size, presence_input_size, CB).to(device)
    path = f"trial_{trial.number}_model.pt"

    # Train the model using your `train` function

    if CDF : target_loss = train_CDFo(model, train_dataloader, valid_dataloader, learning_rate, num_epochs, lr_decay_factor, lr_decay_steps, LD, path, batchsize)
    else : target_loss = train_CDFx(model, train_dataloader, valid_dataloader, learning_rate, num_epochs, lr_decay_factor, lr_decay_steps, LD, path, batchsize)

    return target_loss

In [None]:
if __name__ == "__main__":
    # Create an Optuna study and optimize the objective function
    db_url = "sqlite:///DAHS_study.db"
    study = optuna.create_study(direction="minimize", storage = db_url)
    study.optimize(objective, n_trials = 30)

    # Get the best hyperparameters and corresponding loss
    best_params = study.best_params
    best_loss = study.best_value

    print("Best Hyperparameters:", best_params)
    print("Best Validation Loss:", best_loss)

[I 2024-02-26 01:48:13,845] A new study created in RDB with name: no-name-def1c71c-9b43-48d4-98f5-2406b58304f2
[I 2024-02-26 04:18:32,237] Trial 0 finished with value: 0.8709253072738647 and parameters: {'hidden_size': 50, 'embedding_size': 100, 'recurrent_num_layers': 5, 'embedding_num_layers': 4, 'CB': 0, 'recurrent_type': 'RNN', 'activation_type': 'ReLU', 'Batchsize': 128, 'learning_rate': 0.0001, 'lr_decay_steps': 800, 'lr_decay_factor': 0.95, 'highway_network': 0, 'LD': 0, 'LN': 0, 'CDF': 0}. Best is trial 0 with value: 0.8709253072738647.
[I 2024-02-26 04:29:13,915] Trial 1 finished with value: 1.0596877336502075 and parameters: {'hidden_size': 100, 'embedding_size': 50, 'recurrent_num_layers': 5, 'embedding_num_layers': 2, 'CB': 1, 'recurrent_type': 'RNN', 'activation_type': 'ReLU', 'Batchsize': 128, 'learning_rate': 0.001, 'lr_decay_steps': 100, 'lr_decay_factor': 0.95, 'highway_network': 0, 'LD': 0, 'LN': 0, 'CDF': 1}. Best is trial 0 with value: 0.8709253072738647.
[I 2024-02

Best Hyperparameters: {'hidden_size': 50, 'embedding_size': 50, 'recurrent_num_layers': 3, 'embedding_num_layers': 4, 'CB': 0, 'recurrent_type': 'GRU', 'activation_type': 'LeakyReLU', 'Batchsize': 512, 'learning_rate': 0.01, 'lr_decay_steps': 400, 'lr_decay_factor': 0.9, 'highway_network': 1, 'LD': 1, 'LN': 0, 'CDF': 1}
Best Validation Loss: 0.7603350877761841


# Ablation

In [25]:
hidden_size = 50  # Replace with the desired hidden size
num_layers = 3  # Replace with the desired number of layers
seq_len = 56
embedding_size = 50
highway_network = 1
Embedding_num_layers = 4
Recurrent_module = 'GRU'
Embedding_module = 'LeakyReLU'
learning_rate = 0.01
lr_decay_factor = 0.9
lr_decay_steps = 400
batchsize = 512
patience = lr_decay_steps * 2
LN = 0
CB = 0
LD = 1
CDF = 1
num_epochs = 10000000

In [26]:
def test(model,dataloader):

    model.eval()

    criterion =  [nn.BCELoss()]

    main_datasets_6h = []
    main_datasets_12h = []
    main_datasets_18h = []
    main_datasets_24h = []
    main_datasets_30h = []
    main_datasets_36h = []
    main_datasets_42h = []
    main_datasets_48h = []

    sub_datasets_1 = []
    sub_datasets_2 = []
    sub_datasets_3 = []
    sub_datasets_3D = []

    with torch.no_grad():

        test_loss = 0.0
      
        for data in dataloader.dataset:
                
            inputs_numeric, inputs_presence, targets_main, targets_sub, mask = [d.to(device) for d in data.tensors]
            
            out_main, out_sub = model(inputs_numeric, inputs_presence)

            test_main_loss = 0.0
            test_sub_loss = 0.0  

            out_main = F.sigmoid(out_main)
            out_sub = F.sigmoid(out_sub)    

            out_main = cdf(out_main)
            out_sub = cdf(out_sub)
            
            for i in range(mask.shape[0]):

                length = (mask[i,:] == 0).sum()
                test_main_loss += sum(criterion(out_main[i, j, :length], targets_main[i, j, :length]) for j, criterion in enumerate(criterion))
                test_sub_loss += sum(criterion(out_sub[i, j, :length], targets_sub[i, j, :length]) for j, criterion in enumerate(criterion))

            test_loss += (test_main_loss + test_sub_loss).item()

            i = (mask[0,:] == 0).sum()

            dataset = TensorDataset(out_main[:,0,:i],out_main[:,1,:i],out_main[:,2,:i],out_main[:,3,:i],out_main[:,4,:i],out_main[:,5,:i],out_main[:,6,:i],out_main[:,7,:i],
                                    out_sub[:,3,:i],out_sub[:,2,:i],out_sub[:,1,:i],out_sub[:,0,:i],
                                    targets_main[:,0,:i],targets_main[:,1,:i],targets_main[:,2,:i],targets_main[:,3,:i],targets_main[:,4,:i],targets_main[:,5,:i],targets_main[:,6,:i],targets_main[:,7,:i],
                                    targets_sub[:,3,:i],targets_sub[:,2,:i],targets_sub[:,1,:i],targets_sub[:,0,:i])
                                    
            dataloader = DataLoader(dataset, batch_size=1, shuffle=False, drop_last=False)

            for out_main_6h,out_main_12h,out_main_18h,out_main_24h,out_main_30h,out_main_36h,out_main_42h,out_main_48h,out_sub_1,out_sub_2,out_sub_3,out_sub_3D,targets_main_6h,targets_main_12h,targets_main_18h,targets_main_24h,targets_main_30h,targets_main_36h,targets_main_42h,targets_main_48h,targets_sub_1,targets_sub_2,targets_sub_3,targets_sub_3D in dataloader:
                
                main_dataset_6h = TensorDataset(out_main_6h, targets_main_6h)
                main_dataset_12h = TensorDataset(out_main_12h, targets_main_12h)
                main_dataset_18h = TensorDataset(out_main_18h, targets_main_18h)
                main_dataset_24h = TensorDataset(out_main_24h, targets_main_24h)
                main_dataset_30h = TensorDataset(out_main_30h, targets_main_30h)
                main_dataset_36h = TensorDataset(out_main_36h, targets_main_36h)
                main_dataset_42h = TensorDataset(out_main_42h, targets_main_42h)
                main_dataset_48h = TensorDataset(out_main_48h, targets_main_48h)

                main_datasets_6h.append(main_dataset_6h)
                main_datasets_12h.append(main_dataset_12h)
                main_datasets_18h.append(main_dataset_18h)
                main_datasets_24h.append(main_dataset_24h)
                main_datasets_30h.append(main_dataset_30h)
                main_datasets_36h.append(main_dataset_36h)
                main_datasets_42h.append(main_dataset_42h)
                main_datasets_48h.append(main_dataset_48h)

                sub_dataset_1 = TensorDataset(out_sub_1, targets_sub_1)
                sub_dataset_2 = TensorDataset(out_sub_2, targets_sub_2)
                sub_dataset_3 = TensorDataset(out_sub_3, targets_sub_3)
                sub_dataset_3D = TensorDataset(out_sub_3D, targets_sub_3D)

                sub_datasets_1.append(sub_dataset_1)
                sub_datasets_2.append(sub_dataset_2)
                sub_datasets_3.append(sub_dataset_3)
                sub_datasets_3D.append(sub_dataset_3D)               

    print(f"Test Loss: {test_loss:.4f}")

    main_datasets = [main_datasets_6h, main_datasets_12h, main_datasets_18h, main_datasets_24h,main_datasets_30h, main_datasets_36h, main_datasets_42h, main_datasets_48h]
    sub_datasets = [sub_datasets_1, sub_datasets_2, sub_datasets_3, sub_datasets_3D]
    
    return main_datasets, sub_datasets

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=False, drop_last=True)
calibration_dataloader = DataLoader(calibration_dataset, batch_size=1, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=True)

for batch in test_dataloader.dataset:
    X_numeric, X_presence, Y_main, Y_sub, mask = batch.tensors
    print("X_numeric shape:", X_numeric.shape)
    print("X_presence shape:", X_presence.shape)
    print("Y_main shape:", Y_main.shape)
    print("Y_sub shape:", Y_sub.shape)
    print("mask shape:", mask.shape)

X_numeric shape: torch.Size([3, 56, 58])
X_presence shape: torch.Size([3, 56, 83])
Y_main shape: torch.Size([3, 8, 56])
Y_sub shape: torch.Size([3, 4, 56])
mask shape: torch.Size([3, 56])
X_numeric shape: torch.Size([13, 56, 58])
X_presence shape: torch.Size([13, 56, 83])
Y_main shape: torch.Size([13, 8, 56])
Y_sub shape: torch.Size([13, 4, 56])
mask shape: torch.Size([13, 56])
X_numeric shape: torch.Size([16, 56, 58])
X_presence shape: torch.Size([16, 56, 83])
Y_main shape: torch.Size([16, 8, 56])
Y_sub shape: torch.Size([16, 4, 56])
mask shape: torch.Size([16, 56])
X_numeric shape: torch.Size([15, 56, 58])
X_presence shape: torch.Size([15, 56, 83])
Y_main shape: torch.Size([15, 8, 56])
Y_sub shape: torch.Size([15, 4, 56])
mask shape: torch.Size([15, 56])
X_numeric shape: torch.Size([805, 56, 58])
X_presence shape: torch.Size([805, 56, 83])
Y_main shape: torch.Size([805, 8, 56])
Y_sub shape: torch.Size([805, 4, 56])
mask shape: torch.Size([805, 56])
X_numeric shape: torch.Size([539, 5

# Optuna Model

In [28]:
model = AKIPredictionModel(hidden_size, embedding_size, num_layers, Embedding_num_layers, Embedding_module, Recurrent_module, seq_len, LN, highway_network,len(train_dataloader.dataset[0].tensors[0][0][0]), len(train_dataloader.dataset[0].tensors[1][0][0]),CB).to(device)
model.load_state_dict(torch.load('trial_26_model.pt'))

main_datasets, sub_datasets= test(model, test_dataloader) 

save_path = "main_dataset_3D_Optuna.pt"
torch.save(main_datasets, save_path)

save_path = "sub_dataset_3D_Optuna.pt"
torch.save(sub_datasets, save_path)

main_dataloaders = [DataLoader(dataset, batch_size=1, shuffle=False, drop_last=True) for dataset in main_datasets]
sub_dataloaders = [DataLoader(dataset, batch_size=1, shuffle=False, drop_last=True) for dataset in sub_datasets]

main_dataloader_6h ,main_dataloader_12h ,main_dataloader_18h ,main_dataloader_24h,main_dataloader_30h ,main_dataloader_36h ,main_dataloader_42h ,main_dataloader_48h = main_dataloaders
sub_dataloader_1 ,sub_dataloader_2 ,sub_dataloader_3, sub_dataloader_3D = sub_dataloaders

Test Loss: 1988.9763


In [30]:
model = AKIPredictionModel(hidden_size, embedding_size, num_layers, Embedding_num_layers, Embedding_module, Recurrent_module, seq_len, LN, highway_network,len(train_dataloader.dataset[0].tensors[0][0][0]), len(train_dataloader.dataset[0].tensors[1][0][0]),CB).to(device)
model.load_state_dict(torch.load('trial_26_model.pt'))

main_datasets, sub_datasets= test(model, calibration_dataloader) 

save_path = "main_dataset_3D_Optuna_calibration.pt"
torch.save(main_datasets, save_path)

save_path = "sub_dataset_3D_Optuna_calibration.pt"
torch.save(sub_datasets, save_path)

main_dataloaders = [DataLoader(dataset, batch_size=1, shuffle=False, drop_last=True) for dataset in main_datasets]
sub_dataloaders = [DataLoader(dataset, batch_size=1, shuffle=False, drop_last=True) for dataset in sub_datasets]

main_dataloader_6h ,main_dataloader_12h ,main_dataloader_18h ,main_dataloader_24h,main_dataloader_30h ,main_dataloader_36h ,main_dataloader_42h ,main_dataloader_48h = main_dataloaders
sub_dataloader_1 ,sub_dataloader_2 ,sub_dataloader_3, sub_dataloader_3D = sub_dataloaders

Test Loss: 1018.3242
