In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results, utils
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. Simulations

We choose the censoring rate of the simulated data (it can be either 0.2, 0.4 or 0.6). Data is simulated by the random function generator introduced by Friedman et al. (2001). 
Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set (df_train and df_test are subsets of df_sim). The same training and test set are used for all the models.

In [3]:
name = "Cox-CC"

In [4]:
rate = "0.2" 

In [5]:
dir_sim = "data/simulations/"+rate+"/"

In [6]:
df_sim = pd.read_csv(dir_sim+'simdata.csv')
df_train = pd.read_csv(dir_sim+'sim_train.csv')
df_test = pd.read_csv(dir_sim+'sim_test.csv')

In [7]:
x_train, x_test = utils.normed_data(df_train, df_test)
def get_target(df): return df['yy'].values, df['status'].values
y_train = get_target(df_train)
duration_test, event_test = get_target(df_test)

# II. Model's construction and training

The parameters of the architecture are the one listed in the parameters dataframe, selected by a 5-fold cross-validation among 100 sets of parameters. 

In [8]:
param = pd.read_csv("model/param_simu_"+rate+".csv",sep=';', index_col = 0).T
param_final = param.loc[name]

In [9]:
param_final

neurons         16
drop           0.2
activation    relu
lr_opt        0.01
optimizer     adam
n_layers         1
Name: Cox-CC, dtype: object

In [10]:
neurons = int(param_final['neurons'])
drop = float(param_final['drop'])
activation = param_final['activation']
lr_opt = float(param_final['lr_opt'])
optimizer = param_final['optimizer']
n_layers = int(param_final['n_layers'])

The objective function is used to define the architecture of the neural network. 

In [11]:
model,callbacks  = model_architecture.objective(x_train,  neurons, drop, activation, lr_opt, optimizer, n_layers,name)
log = model.fit(x_train, y_train,neurons, epochs = 100,callbacks = callbacks, verbose=True)

0:	[0s / 0s],		train_loss: 0.6979
1:	[0s / 0s],		train_loss: 0.6411
2:	[0s / 0s],		train_loss: 0.6160
3:	[0s / 0s],		train_loss: 0.6165
4:	[0s / 1s],		train_loss: 0.6279
5:	[0s / 1s],		train_loss: 0.6041
6:	[0s / 1s],		train_loss: 0.5800
7:	[0s / 1s],		train_loss: 0.5912
8:	[0s / 1s],		train_loss: 0.5903
9:	[0s / 2s],		train_loss: 0.5693
10:	[0s / 2s],		train_loss: 0.5406
11:	[0s / 2s],		train_loss: 0.6039
12:	[0s / 2s],		train_loss: 0.5836
13:	[0s / 2s],		train_loss: 0.5623
14:	[0s / 3s],		train_loss: 0.5645
15:	[0s / 3s],		train_loss: 0.5296
16:	[0s / 3s],		train_loss: 0.5266
17:	[0s / 3s],		train_loss: 0.5392
18:	[0s / 4s],		train_loss: 0.5114
19:	[0s / 4s],		train_loss: 0.5575
20:	[0s / 4s],		train_loss: 0.5292
21:	[0s / 4s],		train_loss: 0.5110
22:	[0s / 4s],		train_loss: 0.5549
23:	[0s / 5s],		train_loss: 0.5621
24:	[0s / 5s],		train_loss: 0.5649
25:	[0s / 5s],		train_loss: 0.5581
26:	[0s / 5s],		train_loss: 0.5986
27:	[0s / 5s],		train_loss: 0.5611
28:	[0s / 6s],		train_loss: 0.

# III. Results

We present here the results for one simulation dataset. We then simulate 100 datasets for one censoring rate and output the results for the 100 datasets. 

In [12]:
_ = model.compute_baseline_hazards()
surv = model.predict_surv_df(x_test)
results_all = output_results.output_simulations(surv,df_train, x_test, df_test, name)

We output the median survival time, the AUC value for this time, Uno's C-index for median time and the final censoring rate of the simulated dataset.

In [13]:
results_all

Unnamed: 0,t_med,auc_med,unoc,cens_rate
0,1.203649,0.884677,0.813185,19.333333
