In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results, utils
from pycox.models import CoxTime
from pycox.models.cox_time import MLPVanillaCoxTime
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. Simulation data

We choose the censoring rate of the simulated data (it can be either 0.2  or 0.6). Data is simulated by the random function generator introduced by Friedman et al. (2001). 
Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set (df_train and df_test are subsets of df_sim). The same training and test set are used for all the models.

In [3]:
name = "CoxTime"

In [4]:
rate = 0.6 

In [5]:
dir_sim = "data/simulations/"+str(rate)+"/"

df_sim = pd.read_csv(dir_sim+'simdata.csv')
df_train = pd.read_csv(dir_sim+'sim_train.csv')
df_test = pd.read_csv(dir_sim+'sim_test.csv')

In [6]:
x_train, y_train, x_test, duration_test, event_test, labtrans = utils.prepare_data(df_train, df_test,name)

# II. Model training

The model is trained using the best parameters determined by a 5 folds cross validation.  

In [7]:
if name=="DeepHit":
    num_durations = 10 
    labtrans = DeepHitSingle.label_transform(num_durations)
    y_train = labtrans.fit_transform(*(df_train['yy'].values, df_train['status'].values))
elif name=="CoxTime":
    labtrans = CoxTime.label_transform()
    y_train = labtrans.fit_transform(*(df_train['yy'].values, df_train['status'].values))
else:
    labtrans=""
    y_train = (df_train['yy'].values, df_train['status'].values)

param = pd.read_csv('model/best_param_simu/'+name+'_best_param.csv', sep = ";",index_col=0)

model,callbacks  = model_architecture.build_model(x_train, 
                           param['neurons'][rate], 
                           param['dropout'][rate], 
                           param['activation'][rate],
                           param['lr'][rate],
                           param['optimizer'][rate],
                           param['n_layers'][rate],
                           name,
                           labtrans)
log = model.fit(x_train, 
         y_train, 
         int(param['batch_size'][rate]), 
         epochs =100, 
         callbacks = callbacks, 
         verbose = False)

_ = model.compute_baseline_hazards()

# III. Results

We output the C-index at median time and the Integrated Brier Score. 

In [8]:
surv = model.predict_surv_df(x_test)
res = output_results.output_sim_data(model,surv,x_train, df_train, x_test, df_test)
res

Unnamed: 0,c_median,ibs
0,0.566194,0.195012
