In [1]:
import pickle
import datetime
from pathlib import Path
import yaml
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sksurv.ensemble import RandomSurvivalForest
from sksurv.metrics import concordance_index_censored
from sksurv.util import Surv
from sklearn.inspection import permutation_importance
import optuna
import shap
from explainability import SHAP
from evaluation import evaluate_survival_model, PartialLogLikelihood
from training_survival_analysis import train_model
from models import MinimalisticNetwork
import matplotlib.pyplot as plt
import json

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset


# DeepSurv 사용 위한 Dataset 클래스 정의
# 간단한 Dataset 클래스: 이미 수치형 데이터로 준비되었다고 가정. 인코딩 후 사용
class SimpleDataset(Dataset):
    def __init__(self, X, y_time, y_event):
        """
        Args:
            X: 입력 특성. Pandas DataFrame 또는 numpy array (수치형 데이터)
            y_time: 생존 시간. Pandas Series 또는 numpy array (float)
            y_event: 이벤트(실패 여부). Pandas Series 또는 numpy array (0/1 숫자)
        """
        if isinstance(X, pd.DataFrame):
            self.X = X.values.astype(np.float32)
        else:
            self.X = np.asarray(X, dtype=np.float32)
        self.y_time = np.asarray(y_time, dtype=np.float32)
        self.y_event = np.asarray(y_event, dtype=np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return (torch.tensor(self.X[index], dtype=torch.float32),
                torch.tensor(self.y_time[index], dtype=torch.float32),
                torch.tensor(self.y_event[index], dtype=torch.float32))

In [2]:
# ------------------------------
# 데이터 로딩 및 전처리 (공통)
# ------------------------------
df = pd.read_csv("processed_survival_data_modified.csv") # processed_survival_data_modified : Age 범주화한 데이터셋
df["survival"] = df["survival"].map({"survive": 0, "fail": 1}) # 종속변수(수술 성공 여부)를 0, 1로 변환

# 코드 통일성을 위해 생존기간, 성공여부 변수 이름 변경
time_col = "fu_total_yr"
event_col = "survival"
df.rename(columns={event_col: "vit_status", time_col: "survival_time"}, inplace=True)

# 분석 제외할 변수 제거
exclude_columns = ["patient_ID", "me", "failure_reason", "failure_date", 
                   "last_fu_date", "surgery_Date", "fu_for_fail_yr", "fu_for_survival_yr"]
selected_features = [col for col in df.columns if col not in ["vit_status", "survival_time"] + exclude_columns] # 분석에 사용할 변수만 포함

# 지정한 컬럼들에 결측치가 있는 행 제거
df = df.dropna(subset = selected_features)

############### One-hot encoding ##############
# 평가 함수가 기대하는 구조화 배열 생성 (필드명: 'vit_status', 'survival_time')
y = np.zeros(df.shape[0], dtype=[('vit_status', '?'), ('survival_time', '<f8')])
y['vit_status'] = df["vit_status"].values.astype(bool)
y['survival_time'] = df["survival_time"].values.astype(float)

X = pd.get_dummies(df[selected_features], drop_first=True) # get_dummies : 범주형 변수에 대해 One-hot encoding
X = X.astype(np.float32)
############### One-hot encoding ##############

# ############### Label encoding ##############
# # 평가 함수가 기대하는 구조화 배열 생성 (필드명: 'vit_status', 'survival_time')
# y = np.zeros(df.shape[0], dtype=[('vit_status', '?'), ('survival_time', '<f8')])
# y['vit_status'] = df["vit_status"].values.astype(bool)
# y['survival_time'] = df["survival_time"].values.astype(float)

# # label encoding: 각 범주형 변수를 category형으로 변환 후, 코드화
# for col in selected_features:
#     if df[col].dtype == 'object' or str(df[col].dtype).startswith('category'):
#         df[col] = df[col].astype('category').cat.codes

# # 최종 X 데이터셋 생성 (필요한 경우, float32 타입으로 변환)
# X = df[selected_features].values.astype(np.float32)
# ############### Label encoding ##############

In [3]:
# 설정
random_state = 1234
np.random.seed(random_state)
model_name = "deepsurv"

# ------------------------------
# 데이터 분할 및 K-Fold 설정
# ------------------------------

# Split between Test and Training for Hyperparameter Tuning
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)
kfold = KFold(n_splits=5, shuffle=True, random_state=random_state)

# DeepSurv 입력값에 맞게 tensor 형태로 변환
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_time = pd.Series(y_train['survival_time'])
y_train_event = pd.Series(y_train['vit_status'])
y_train_time_tensor = torch.tensor(np.ascontiguousarray(y_train_time.values), dtype=torch.float32)
y_train_event_tensor = torch.tensor(np.ascontiguousarray(y_train_event.values), dtype=torch.float32)
train_dataset = TensorDataset(X_train_tensor, y_train_time_tensor, y_train_event_tensor)

# ------------------------------
# config.yaml 불러오기 및 RSF 파라미터 갱신
# ------------------------------
config = yaml.safe_load(Path("./config.yaml").read_text())
base_path = config["base_path"]
deepsurv_config = config["deep_surv"]
device = config["device"]

stable_params = {
    "device": device,
    "input_dim": X_train.shape[1],
    "loss_fn": PartialLogLikelihood,
    "epochs": 300, # epochs 300개 사용
    "model": "minimalistic_network"
    }

In [4]:
# ------------------------------
# Optuna Objective 함수 정의 (DeepSurv)
# ------------------------------

def objective_deep_surv(trial: optuna.Trial, stable_params):
    # Hyperparameter 범위 설정
    flexible_params = {
        "batch_size": trial.suggest_categorical("batch_size", deepsurv_config["batch_size"]),
        "inner_dim": trial.suggest_categorical("inner_dim", deepsurv_config["inner_dim"]),
        "lr": trial.suggest_categorical("lr", deepsurv_config["lr"]),
        "weight_decay": trial.suggest_categorical("weight_decay", deepsurv_config["weight_decay"])
    }
    params = {**stable_params, **flexible_params}
    scores = []

    
    # dataset_test는 전체 테스트 세트를 사용 (전역 X_test)
    dataset_test = torch.Tensor(X_test.values)
    
    for train_idx, val_idx in kfold.split(X_train, y_train):
        X_train_fold = X_train.iloc[train_idx]
        X_val_fold = X_train.iloc[val_idx]

        # Class 변환
        dataset_train = SimpleDataset(
            X_train_fold,
            pd.Series(y_train['survival_time'][train_idx]),
            pd.Series(y_train['vit_status'][train_idx])
        )
        
        # 모델 생성 및 학습 (train_model 함수 호출)
        model, losses, test_eval = train_model(dataset_train, params, trial=trial) # K-fold training data로 model fitting
        model.eval()
        y_pred = model(dataset_test.to(params["device"])).detach().cpu().numpy() # test data로 예측값 계산
        y_pred = y_pred + np.random.random(y_pred.shape) * 1e-7
        
        # 평가: 여기서는 각 fold에 대해 evaluate_survival_model의 c_index (Concordance Index)를 사용
        try:
            fold_score = concordance_index_censored(y_test["vit_status"], y_test["survival_time"], np.squeeze(y_pred))[0]
        except ValueError as e:
            print(f"Fold evaluation skipped due to error: {e}") # 예측값에 성공/실패 중 하나가 아예 없는 경우 오류 발생. 해당 과정 스킵
            continue
        scores.append(fold_score)
    
    if len(scores) == 0:
        # 모든 fold에서 오류가 발생한 경우, 기본값 반환 (예: 0)
        score = 0.0
    else:
        score = np.mean(scores)
    trial_nr = trial.number
    print(f"Trial {trial_nr}: {score}")
    
    return score

In [5]:
# Optuna 스터디 생성 및 최적화
study = optuna.create_study(study_name=model_name+str(datetime.datetime.now()),
                            direction="maximize",
                            sampler=optuna.samplers.TPESampler(seed=random_state))
study.optimize(lambda trial: objective_deep_surv(trial, stable_params), n_trials=50)
best_params = study.best_trial.params
print("Best DeepSurv params:", best_params) # 50번의 Trial로 찾은 최선의 Hyperparameter

[I 2025-03-06 16:47:34,427] A new study created in memory with name: deepsurv2025-03-06 16:47:34.425738
[I 2025-03-06 16:47:57,746] Trial 0 finished with value: 0.588157894736842 and parameters: {'batch_size': 3540, 'inner_dim': 64, 'lr': 0.001, 'weight_decay': 5}. Best is trial 0 with value: 0.588157894736842.


Trial 0: 0.588157894736842


[I 2025-03-06 16:48:21,253] Trial 1 finished with value: 0.475 and parameters: {'batch_size': 512, 'inner_dim': 16, 'lr': 5e-05, 'weight_decay': 5}. Best is trial 0 with value: 0.588157894736842.


Trial 1: 0.475


[I 2025-03-06 16:48:46,466] Trial 2 finished with value: 0.5526315789473685 and parameters: {'batch_size': 1024, 'inner_dim': 16, 'lr': 0.0005, 'weight_decay': 1}. Best is trial 0 with value: 0.588157894736842.


Trial 2: 0.5526315789473685


[I 2025-03-06 16:49:09,179] Trial 3 finished with value: 0.5144736842105264 and parameters: {'batch_size': 512, 'inner_dim': 8, 'lr': 0.0001, 'weight_decay': 5}. Best is trial 0 with value: 0.588157894736842.


Trial 3: 0.5144736842105264


[I 2025-03-06 16:49:32,612] Trial 4 finished with value: 0.6026315789473684 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 1}. Best is trial 4 with value: 0.6026315789473684.


Trial 4: 0.6026315789473684


[I 2025-03-06 16:49:55,586] Trial 5 finished with value: 0.5776315789473684 and parameters: {'batch_size': 1770, 'inner_dim': 64, 'lr': 0.0005, 'weight_decay': 0.001}. Best is trial 4 with value: 0.6026315789473684.


Trial 5: 0.5776315789473684


[I 2025-03-06 16:50:18,927] Trial 6 finished with value: 0.4723684210526316 and parameters: {'batch_size': 512, 'inner_dim': 8, 'lr': 0.001, 'weight_decay': 5}. Best is trial 4 with value: 0.6026315789473684.


Trial 6: 0.4723684210526316


[I 2025-03-06 16:50:42,110] Trial 7 finished with value: 0.5394736842105263 and parameters: {'batch_size': 3540, 'inner_dim': 32, 'lr': 0.0005, 'weight_decay': 1}. Best is trial 4 with value: 0.6026315789473684.


Trial 7: 0.5394736842105263


[I 2025-03-06 16:51:05,253] Trial 8 finished with value: 0.5394736842105263 and parameters: {'batch_size': 3540, 'inner_dim': 16, 'lr': 1e-05, 'weight_decay': 1}. Best is trial 4 with value: 0.6026315789473684.


Trial 8: 0.5394736842105263


[I 2025-03-06 16:51:27,419] Trial 9 finished with value: 0.6013157894736842 and parameters: {'batch_size': 1770, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 0.1}. Best is trial 4 with value: 0.6026315789473684.


Trial 9: 0.6013157894736842


[I 2025-03-06 16:51:51,245] Trial 10 finished with value: 0.6013157894736842 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 0.01}. Best is trial 4 with value: 0.6026315789473684.


Trial 10: 0.6013157894736842


[I 2025-03-06 16:52:14,661] Trial 11 finished with value: 0.6013157894736842 and parameters: {'batch_size': 1770, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 0.1}. Best is trial 4 with value: 0.6026315789473684.


Trial 11: 0.6013157894736842


[I 2025-03-06 16:52:41,291] Trial 12 finished with value: 0.6013157894736842 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 0.1}. Best is trial 4 with value: 0.6026315789473684.


Trial 12: 0.6013157894736842


[I 2025-03-06 16:53:04,858] Trial 13 finished with value: 0.6013157894736842 and parameters: {'batch_size': 1770, 'inner_dim': 32, 'lr': 5e-05, 'weight_decay': 0.0}. Best is trial 4 with value: 0.6026315789473684.


Trial 13: 0.6013157894736842


[I 2025-03-06 16:53:28,151] Trial 14 finished with value: 0.6092105263157894 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 1e-05, 'weight_decay': 0.1}. Best is trial 14 with value: 0.6092105263157894.


Trial 14: 0.6092105263157894


[I 2025-03-06 16:53:51,035] Trial 15 finished with value: 0.6092105263157894 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 1e-05, 'weight_decay': 0.001}. Best is trial 14 with value: 0.6092105263157894.


Trial 15: 0.6092105263157894


[I 2025-03-06 16:54:14,490] Trial 16 finished with value: 0.6092105263157894 and parameters: {'batch_size': 1024, 'inner_dim': 32, 'lr': 1e-05, 'weight_decay': 0.001}. Best is trial 14 with value: 0.6092105263157894.


Trial 16: 0.6092105263157894


[I 2025-03-06 16:54:37,028] Trial 17 finished with value: 0.6078947368421053 and parameters: {'batch_size': 1024, 'inner_dim': 8, 'lr': 1e-05, 'weight_decay': 0.001}. Best is trial 14 with value: 0.6092105263157894.


Trial 17: 0.6078947368421053


[I 2025-03-06 16:55:00,418] Trial 18 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 18: 0.6657894736842105


[I 2025-03-06 16:55:23,879] Trial 19 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 19: 0.6657894736842105


[I 2025-03-06 16:55:47,372] Trial 20 finished with value: 0.6526315789473685 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 0.0001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 20: 0.6526315789473685


[I 2025-03-06 16:56:10,661] Trial 21 finished with value: 0.6526315789473685 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 0.0001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 21: 0.6526315789473685


[I 2025-03-06 16:56:35,060] Trial 22 finished with value: 0.6526315789473685 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 0.0001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 22: 0.6526315789473685


[I 2025-03-06 16:56:58,601] Trial 23 finished with value: 0.6526315789473685 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 0.0001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 23: 0.6526315789473685


[I 2025-03-06 16:57:21,929] Trial 24 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 24: 0.6657894736842105


[I 2025-03-06 16:57:45,610] Trial 25 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 25: 0.6657894736842105


[I 2025-03-06 16:58:08,006] Trial 26 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 26: 0.6657894736842105


[I 2025-03-06 16:58:31,960] Trial 27 finished with value: 0.6657894736842105 and parameters: {'batch_size': 3540, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.0}. Best is trial 18 with value: 0.6657894736842105.


Trial 27: 0.6657894736842105


[I 2025-03-06 16:58:55,677] Trial 28 finished with value: 0.6657894736842105 and parameters: {'batch_size': 512, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 28: 0.6657894736842105


[I 2025-03-06 16:59:19,017] Trial 29 finished with value: 0.5644736842105263 and parameters: {'batch_size': 3540, 'inner_dim': 64, 'lr': 0.001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 29: 0.5644736842105263


[I 2025-03-06 16:59:42,281] Trial 30 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 30: 0.6657894736842105


[I 2025-03-06 17:00:05,553] Trial 31 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 31: 0.6657894736842105


[I 2025-03-06 17:00:28,838] Trial 32 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 32: 0.6657894736842105


[I 2025-03-06 17:00:52,160] Trial 33 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 33: 0.6657894736842105


[I 2025-03-06 17:01:15,838] Trial 34 finished with value: 0.5394736842105263 and parameters: {'batch_size': 512, 'inner_dim': 16, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 34: 0.5394736842105263


[I 2025-03-06 17:01:38,448] Trial 35 finished with value: 0.588157894736842 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 0.001, 'weight_decay': 5}. Best is trial 18 with value: 0.6657894736842105.


Trial 35: 0.588157894736842


[I 2025-03-06 17:02:02,234] Trial 36 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.0}. Best is trial 18 with value: 0.6657894736842105.


Trial 36: 0.6657894736842105


[I 2025-03-06 17:02:27,470] Trial 37 finished with value: 0.6078947368421053 and parameters: {'batch_size': 1024, 'inner_dim': 8, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 37: 0.6078947368421053


[I 2025-03-06 17:02:53,775] Trial 38 finished with value: 0.631578947368421 and parameters: {'batch_size': 512, 'inner_dim': 16, 'lr': 0.0005, 'weight_decay': 5}. Best is trial 18 with value: 0.6657894736842105.


Trial 38: 0.631578947368421


[I 2025-03-06 17:03:17,368] Trial 39 finished with value: 0.5763157894736842 and parameters: {'batch_size': 1770, 'inner_dim': 64, 'lr': 0.0005, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 39: 0.5763157894736842


[I 2025-03-06 17:03:41,274] Trial 40 finished with value: 0.6657894736842105 and parameters: {'batch_size': 3540, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 1}. Best is trial 18 with value: 0.6657894736842105.


Trial 40: 0.6657894736842105


[I 2025-03-06 17:04:04,986] Trial 41 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 41: 0.6657894736842105


[I 2025-03-06 17:04:28,627] Trial 42 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 42: 0.6657894736842105


[I 2025-03-06 17:04:51,524] Trial 43 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 43: 0.6657894736842105


[I 2025-03-06 17:05:14,832] Trial 44 finished with value: 0.21710526315789475 and parameters: {'batch_size': 1024, 'inner_dim': 8, 'lr': 0.001, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 44: 0.21710526315789475


[I 2025-03-06 17:05:39,790] Trial 45 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 45: 0.6657894736842105


[I 2025-03-06 17:06:03,972] Trial 46 finished with value: 0.5394736842105263 and parameters: {'batch_size': 1024, 'inner_dim': 16, 'lr': 1e-05, 'weight_decay': 1}. Best is trial 18 with value: 0.6657894736842105.


Trial 46: 0.5394736842105263


[I 2025-03-06 17:06:27,627] Trial 47 finished with value: 0.5828947368421052 and parameters: {'batch_size': 1770, 'inner_dim': 64, 'lr': 0.0005, 'weight_decay': 5}. Best is trial 18 with value: 0.6657894736842105.


Trial 47: 0.5828947368421052


[I 2025-03-06 17:06:50,677] Trial 48 finished with value: 0.6657894736842105 and parameters: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}. Best is trial 18 with value: 0.6657894736842105.


Trial 48: 0.6657894736842105


[I 2025-03-06 17:07:14,205] Trial 49 finished with value: 0.6657894736842105 and parameters: {'batch_size': 512, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.0}. Best is trial 18 with value: 0.6657894736842105.


Trial 49: 0.6657894736842105
Best DeepSurv params: {'batch_size': 1024, 'inner_dim': 64, 'lr': 1e-05, 'weight_decay': 0.01}


In [6]:
# 각 fold의 결과를 저장할 딕셔너리 생성
fold_scores = {}

# 최종 모델 학습 및 결과 저장
for i, (train_fold, val_fold) in enumerate(kfold.split(X_train, y_train)):
    X_train_fold = X_train.iloc[train_fold]
    X_val_fold = X_train.iloc[val_fold]
    
    # 학습 데이터셋 생성
    dataset_train = SimpleDataset(X_train_fold, 
                                  y_train["vit_status"][train_fold],
                                  y_train["survival_time"][train_fold])
    # 모델 학습 (train_model 함수 호출)
    best_model, losses, test_eval = train_model(dataset_train, {**stable_params, **best_params})
    
    best_model.eval()
    # 예측: device에 맞게 Tensor 변환 후 예측 수행
    y_pred = best_model(torch.Tensor(X_val_fold.values).to(stable_params["device"])).detach().cpu().numpy()
    
    # 평가: evaluate_survival_model 함수 사용
    scores = evaluate_survival_model(best_model, X_val_fold.values, y_train[train_fold], y_train[val_fold])
    print(f"Final DeepSurv Scores in Fold {i}: {scores}")
    fold_scores[f"fold_{i}"] = scores  # scores가 dict 형식일 경우 그대로 저장
    
    # Permutation Importance 계산 및 저장
    result = permutation_importance(
        best_model, X_val_fold, y_train[val_fold], n_repeats=15, random_state=random_state)
    result_dict = {k: result[k] for k in ("importances_mean", "importances_std")}
    permutation_importances = pd.DataFrame(result_dict, index=X_val_fold.columns).sort_values(by="importances_mean", ascending=False)
    
    print("Permutation Importances:")
    print(permutation_importances)
    
    # 각 fold의 permutation importance CSV 파일로 저장
    perm_imp_path = f"DeepSurv_permutation_importances_fold_{i}.csv"
    permutation_importances.to_csv(perm_imp_path, encoding = 'utf-8')
    print(f"Permutation importances saved to {perm_imp_path}")

    # ---- SHAP values 저장 ----
    explainer = shap.Explainer(best_model.predict, X_val_fold.values, feature_names=X_val_fold.columns.tolist())  
    shap_values = explainer(X_val_fold.values)
    
    # ---- SHAP Beeswarm Plot 저장 ----
    plt.figure(figsize=(10, 6))  # 적절한 크기 설정
    shap.plots.beeswarm(shap_values, show=False)
    
    beeswarm_path = f"DeepSurv_beeswarm_fold_{i}.png"
    plt.tight_layout()  # 여백 자동 조정
    plt.savefig(beeswarm_path, dpi=300, bbox_inches="tight")  # 잘림 방지
    plt.close()  # 메모리 정리
    print(f"Beeswarm plot saved to {beeswarm_path}")

# Final Scores
fold_scores = pd.DataFrame(fold_scores).T
final_scores = fold_scores.mean(skipna=True)
print(final_scores)

KeyboardInterrupt: 

In [7]:
# Final Scores
fold_scores = pd.DataFrame(fold_scores).T
final_scores = fold_scores.mean(skipna=True)
print(final_scores)

Series([], dtype: float64)
