In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results
from pycox.models import CoxTime
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. METABRIC data

We use METABRIC data. We use clinical and pathological informations and gene expression data. Missing values for explanatory variables are completed. Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set. The same training and test set are used for all the models.
For Cox-Time, the time variable is included in the input data with the labtrans function.

In [3]:
name = "CoxTime"

In [4]:
df_train = pd.read_csv("data/real_data/metabric_train.csv")
data_train = skSurv.from_arrays(event=df_train['cen_train'], time=df_train['surv_train'])
x_train = np.array(df_train.drop(['surv_train','cen_train'], axis = 1), dtype = 'float32')
labtrans = CoxTime.label_transform(with_mean = False, with_std = False)
get_target = lambda df: (df['surv_train'].values, df['cen_train'].values)
y_train = labtrans.fit_transform(*get_target(df_train))

df_test = pd.read_csv("data/real_data/metabric_test.csv")
x_test = np.array(df_test.drop(['surv_test','cen_test'], axis = 1), dtype = 'float32')

# II. Model's construction and training

The parameters of the architecture are the one listed in the parameters dataframe, selected by a 5-fold cross-validation among 100 sets of parameters. 

In [5]:
param = pd.read_csv("model/param_metabric.csv",sep=';', index_col = 0).T
param_final = param.loc[name]

In [6]:
print(param_final)

neurons           32
drop             0.3
activation       elu
lr_opt        0.0025
optimizer       adam
n_layers           3
Name: CoxTime, dtype: object


In [7]:
neurons = int(param_final['neurons'])
drop = float(param_final['drop'])
activation = param_final['activation']
lr_opt = float(param_final['lr_opt'])
optimizer = param_final['optimizer']
n_layers = int(param_final['n_layers'])

The objective function is used to define the architecture of the neural network. 

In [8]:
model,callbacks  = model_architecture.objective(x_train,  neurons, drop, activation, lr_opt, optimizer, n_layers,name, labtrans = labtrans)
log = model.fit(x_train, y_train,neurons, epochs = 100,callbacks = callbacks, verbose=True)

0:	[0s / 0s],		train_loss: 0.7361
1:	[0s / 0s],		train_loss: 0.7119
2:	[0s / 0s],		train_loss: 0.6767
3:	[0s / 0s],		train_loss: 0.6616
4:	[0s / 1s],		train_loss: 0.6176
5:	[0s / 1s],		train_loss: 0.5913
6:	[0s / 1s],		train_loss: 0.6100
7:	[0s / 1s],		train_loss: 0.6022
8:	[0s / 1s],		train_loss: 0.5813
9:	[0s / 2s],		train_loss: 0.5860
10:	[0s / 2s],		train_loss: 0.6002
11:	[0s / 2s],		train_loss: 0.5640
12:	[0s / 2s],		train_loss: 0.5518
13:	[0s / 3s],		train_loss: 0.5758
14:	[0s / 3s],		train_loss: 0.5404
15:	[0s / 3s],		train_loss: 0.5487
16:	[0s / 3s],		train_loss: 0.5465
17:	[0s / 4s],		train_loss: 0.5307
18:	[0s / 4s],		train_loss: 0.5195
19:	[0s / 4s],		train_loss: 0.5097
20:	[0s / 4s],		train_loss: 0.5270
21:	[0s / 4s],		train_loss: 0.5466
22:	[0s / 5s],		train_loss: 0.5475
23:	[0s / 5s],		train_loss: 0.4987
24:	[0s / 5s],		train_loss: 0.5089
25:	[0s / 5s],		train_loss: 0.5422
26:	[0s / 6s],		train_loss: 0.5050
27:	[0s / 6s],		train_loss: 0.5020
28:	[0s / 6s],		train_loss: 0.

# III. Results

We sample 100 test sets iteratively among the test dataset to obtain bootstraped results. 

In [9]:
n_iterations = 100
results_all = output_results.output_bootstrap(model, n_iterations, df_train, data_train, y_train, df_test,name)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


We output the AUC value at 5 and 10 years and Uno's C-index at 5 and 10 years, with 95% Confidence Intervals.

In [10]:
results_all

Unnamed: 0,mean,ci95_lo,ci95_hi,std,count
auc5,0.726731,0.674353,0.778858,0.032067,100.0
auc10,0.741634,0.689331,0.806766,0.031462,100.0
unoc5,0.699927,0.655048,0.745196,0.026933,100.0
unoc10,0.687354,0.653115,0.730121,0.023435,100.0
