In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. METABRIC data

We use METABRIC data. We use clinical and pathological informations and gene expression data. Missing values for explanatory variables are completed. Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set. The same training and test set are used for all the models.

In [4]:
name = "Cox-CC"

In [5]:
df_train = pd.read_csv("data/real_data/metabric_train.csv")
data_train = skSurv.from_arrays(event=df_train['cen_train'], time=df_train['surv_train'])
x_train = np.array(df_train.drop(['surv_train','cen_train'], axis = 1), dtype = 'float32')
y_train = (df_train['surv_train'].values, df_train['cen_train'].values)

df_test = pd.read_csv("data/real_data/metabric_test.csv")
x_test = np.array(df_test.drop(['surv_test','cen_test'], axis = 1), dtype = 'float32')

# II. Model's construction and training

The parameters of the architecture are the one listed in the parameters dataframe, selected by a 5-fold cross-validation among 100 sets of parameters. 

In [6]:
param = pd.read_csv("model/param_metabric.csv",sep=';', index_col = 0).T
param_final = param.loc[name]

In [7]:
print(param_final)

neurons            64
drop              0.4
activation        elu
lr_opt          0.001
optimizer     rmsprop
n_layers            2
Name: Cox-CC, dtype: object


In [8]:
neurons = int(param_final['neurons'])
drop = float(param_final['drop'])
activation = param_final['activation']
lr_opt = float(param_final['lr_opt'])
optimizer = param_final['optimizer']
n_layers = int(param_final['n_layers'])

The objective function is used to define the architecture of the neural network. 

In [9]:
model,callbacks  = model_architecture.objective(x_train,  neurons, drop, activation, lr_opt, optimizer, n_layers, name = "Cox-CC")
log = model.fit(x_train, y_train,neurons, epochs = 100,callbacks = callbacks, verbose=True)

0:	[0s / 0s],		train_loss: 0.7194
1:	[0s / 0s],		train_loss: 0.6779
2:	[0s / 0s],		train_loss: 0.6865
3:	[0s / 0s],		train_loss: 0.6913
4:	[0s / 0s],		train_loss: 0.6542
5:	[0s / 0s],		train_loss: 0.6493
6:	[0s / 0s],		train_loss: 0.6398
7:	[0s / 0s],		train_loss: 0.6175
8:	[0s / 1s],		train_loss: 0.6256
9:	[0s / 1s],		train_loss: 0.5996
10:	[0s / 1s],		train_loss: 0.5850
11:	[0s / 1s],		train_loss: 0.6066
12:	[0s / 1s],		train_loss: 0.6074
13:	[0s / 1s],		train_loss: 0.6335
14:	[0s / 1s],		train_loss: 0.5952
15:	[0s / 1s],		train_loss: 0.5699
16:	[0s / 2s],		train_loss: 0.5808
17:	[0s / 2s],		train_loss: 0.5939
18:	[0s / 2s],		train_loss: 0.5714
19:	[0s / 2s],		train_loss: 0.5951
20:	[0s / 2s],		train_loss: 0.5513
21:	[0s / 2s],		train_loss: 0.5723
22:	[0s / 2s],		train_loss: 0.5533
23:	[0s / 2s],		train_loss: 0.5746
24:	[0s / 2s],		train_loss: 0.5513
25:	[0s / 3s],		train_loss: 0.5848
26:	[0s / 3s],		train_loss: 0.5756
27:	[0s / 3s],		train_loss: 0.5687
28:	[0s / 3s],		train_loss: 0.

# III. Results

We sample 100 test sets iteratively among the test dataset to obtain bootstraped results. 

In [10]:
n_iterations = 100
results_all = output_results.output_bootstrap(model, n_iterations, df_train, data_train, y_train, df_test,name)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


We output the AUC value at 5 and 10 years and Uno's C-index at 5 and 10 years, with 95% Confidence Intervals.

In [11]:
results_all

Unnamed: 0,mean,ci95_lo,ci95_hi,std,count
auc5,0.728634,0.66825,0.781472,0.037695,100.0
auc10,0.699823,0.632737,0.759451,0.038257,100.0
unoc5,0.695785,0.63953,0.744368,0.032744,100.0
unoc10,0.667195,0.625149,0.708422,0.026873,100.0
