In [7]:
import numpy as np
import pandas as pd
import os 
import joblib

import warnings
warnings.filterwarnings('ignore')

from pathlib import Path

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import KFold

In [None]:
from survivalquilts import SurvivalQuilts, get_tte_structured

### Import Data

In [None]:
df= pd.read_csv('./dataset/support_data.csv', index_col=0)

In [None]:
T = df[['d.time']].reset_index(drop=True)
Y = df[['death']].reset_index(drop=True)
X = df.drop(columns=['d.time', 'death']).reset_index(drop=True)

scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=list(X))

T.columns = ['time']
Y.columns = ['event']

In [None]:
OUT_ITERATION = 5
SEED          = 1234

skf           = KFold(n_splits=OUT_ITERATION, random_state=SEED, shuffle=True)

### Set time-points of your interest

In [None]:
tmp = np.asarray(T[Y['event']==1])

time_optimization = []
for p in [10,20,30,40,50,60,70,80]:
    time_optimization += [np.percentile(tmp, p)]

In [None]:
cv_ITR = 0

print('CV ITR... ' + str(cv_ITR))

tr_indx, te_indx = list(skf.split(X))[cv_ITR][0], list(skf.split(X))[cv_ITR][1]

X_tr, X_te = X.loc[tr_indx].reset_index(drop=True), X.loc[te_indx].reset_index(drop=True)
Y_tr, Y_te = Y.loc[tr_indx].reset_index(drop=True), Y.loc[te_indx].reset_index(drop=True)
T_tr, T_te = T.loc[tr_indx].reset_index(drop=True), T.loc[te_indx].reset_index(drop=True)


# to compute brier-score without error
T_te2 = T_te.copy(deep=True)
T_te2.loc[T_te2['time'] > T_tr['time'].max(), 'time'] = T_tr['time'].max()

tr_tte_structured  = get_tte_structured(T_tr, Y_tr)
te_tte_structured  = get_tte_structured(T_te, Y_te)
te_tte_structured2 = get_tte_structured(T_te2, Y_te)

In [None]:
model_sq = SurvivalQuilts(
    baselines = {  'CoxPH': {'alpha': 0.05},
                   'WeibullAFT': {'alpha': 0.05, 'penalizer':0.01, 'l1_ratio':0.},
                   'LogNormalAFT': {'alpha': 0.05, 'penalizer':0.01, 'l1_ratio':0.},
                   'LogLogisticAFT': {'alpha': 0.05, 'penalizer':0.01, 'l1_ratio':0.},
                   'RSF': {'n_estimators': 100},  
                   'ExtSurv': {'n_estimators': 100},
                   'CoxBoost': {'n_estimators': 100} },
    num_validation = 5,
    num_outer      = 3,
    num_bo         = 30,
    random_seed    = 1234,
    path           = './'
)

In [None]:
model_sq.fit(X_tr, T_tr, Y_tr, time_optimization=time_optimization)

In [None]:
joblib.dump(model_sq, './trained_SQ.joblib')

In [None]:
eval_times = [30, 60, 150, 300]

In [None]:
from sksurv.metrics import concordance_index_ipcw, brier_score, integrated_brier_score

pred = model_sq.predict(X_te, time_horizons=eval_times)

metric_CINDEX = np.zeros([len(eval_times)])
metric_BRIER = np.zeros([len(eval_times)])

print('evaluating c-index & brier score')
for t, eval_time in enumerate(eval_times):        
    metric_CINDEX[t] = concordance_index_ipcw(tr_tte_structured, te_tte_structured, pred[:,t], tau=eval_time)[0]
    metric_BRIER[t]  = brier_score(tr_tte_structured, te_tte_structured2, 1.- pred[:,t], times=eval_time)[1][0]

print(metric_CINDEX)
print(metric_BRIER)

path = '/mnt/storage/personal/chlee/projects/medical/bormae-cag/death/survivalquilts' + '/itr{}/'.format(cv_ITR)

if not os.path.exists(path + 'results/'):
    os.makedirs(path + 'results/')
        
pd.DataFrame(metric_CINDEX.reshape([1,-1]), columns=eval_times, index=['itr{}'.format(cv_ITR)]).to_csv(path + 'results/cindex.csv')
pd.DataFrame(metric_BRIER.reshape([1,-1]), columns=eval_times, index=['itr{}'.format(cv_ITR)]).to_csv(path + 'results/brier.csv')