In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results, utils
from pycox.models import CoxTime
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. Simulations

We choose the censoring rate of the simulated data (it can be either 0.2, 0.4 or 0.6). Data is simulated by the random function generator introduced by Friedman et al. (2001). 
Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set (df_train and df_test are subsets of df_sim). The same training and test set are used for all the models.

In [3]:
name = "CoxTime"

In [4]:
rate = "0.2" 

In [5]:
dir_sim = "data/simulations/"+rate+"/"

In [6]:
df_sim = pd.read_csv(dir_sim+'simdata.csv')
df_train = pd.read_csv(dir_sim+'sim_train.csv')
df_test = pd.read_csv(dir_sim+'sim_test.csv')

In [7]:
x_train, x_test = utils.normed_data(df_train, df_test)

def get_target(df): return df['yy'].values, df['status'].values
labtrans = CoxTime.label_transform(with_mean = False, with_std = False)
y_train = labtrans.fit_transform(*get_target(df_train))
duration_test, event_test = get_target(df_test)

# II. Model's construction and training

The parameters of the architecture are the one listed in the parameters dataframe, selected by a 5-fold cross-validation among 100 sets of parameters. 

In [8]:
param = pd.read_csv("model/param_simu_"+rate+".csv",sep=';', index_col = 0).T
param_final = param.loc[name]

In [9]:
param_final

neurons           128
drop              0.6
activation       relu
lr_opt         0.0025
optimizer     rmsprop
n_layers            2
Name: CoxTime, dtype: object

In [10]:
neurons = int(param_final['neurons'])
drop = float(param_final['drop'])
activation = param_final['activation']
lr_opt = float(param_final['lr_opt'])
optimizer = param_final['optimizer']
n_layers = int(param_final['n_layers'])

The objective function is used to define the architecture of the neural network. 

In [11]:
model,callbacks  = model_architecture.objective(x_train,  neurons, drop, activation, lr_opt, optimizer, n_layers,name, labtrans = labtrans)
log = model.fit(x_train, y_train,neurons, epochs = 100,callbacks = callbacks, verbose=True)

0:	[0s / 0s],		train_loss: 0.8181
1:	[0s / 0s],		train_loss: 0.7274
2:	[0s / 0s],		train_loss: 0.6949
3:	[0s / 0s],		train_loss: 0.6729
4:	[0s / 0s],		train_loss: 0.6283
5:	[0s / 0s],		train_loss: 0.6615
6:	[0s / 0s],		train_loss: 0.6290
7:	[0s / 0s],		train_loss: 0.6366
8:	[0s / 1s],		train_loss: 0.6186
9:	[0s / 1s],		train_loss: 0.6007
10:	[0s / 1s],		train_loss: 0.6372
11:	[0s / 1s],		train_loss: 0.6263
12:	[0s / 1s],		train_loss: 0.6280
13:	[0s / 1s],		train_loss: 0.6148
14:	[0s / 1s],		train_loss: 0.6204
15:	[0s / 1s],		train_loss: 0.6228
16:	[0s / 1s],		train_loss: 0.6113
17:	[0s / 2s],		train_loss: 0.6098
18:	[0s / 2s],		train_loss: 0.6199
19:	[0s / 2s],		train_loss: 0.5980
20:	[0s / 2s],		train_loss: 0.6132
21:	[0s / 2s],		train_loss: 0.6055
22:	[0s / 2s],		train_loss: 0.6025
23:	[0s / 2s],		train_loss: 0.5982
24:	[0s / 2s],		train_loss: 0.5988
25:	[0s / 2s],		train_loss: 0.5790
26:	[0s / 2s],		train_loss: 0.6007
27:	[0s / 3s],		train_loss: 0.6080
28:	[0s / 3s],		train_loss: 0.

# III. Results

We present here the results for one simulation dataset. We then simulate 100 datasets for one censoring rate and output the results for the 100 datasets. 

In [12]:
_ = model.compute_baseline_hazards()
surv = model.predict_surv_df(x_test)
results_all = output_results.output_simulations(surv,df_train, x_test, df_test,name)

We output the median survival time, the AUC value for this time, Uno's C-index for median time and the final censoring rate of the simulated dataset.

In [13]:
results_all

Unnamed: 0,t_med,auc_med,unoc,cens_rate
0,1.203649,0.866017,0.810271,19.333333
