In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results
from pycox.models import DeepHitSingle
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. METABRIC data

We use METABRIC data. We use clinical and pathological informations and gene expression data. Missing values for explanatory variables are completed. Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set. The same training and test set are used for all the models.
For DeepHit, the time variable is discretized. We split here the time in 10 equally spaced discrete time intervals.

In [3]:
name = "DeepHit"

In [4]:
df_train = pd.read_csv("data/real_data/metabric_train.csv")
data_train = skSurv.from_arrays(event=df_train['cen_train'], time=df_train['surv_train'])
x_train = np.array(df_train.drop(['surv_train','cen_train'], axis = 1), dtype = 'float32')

num_durations = 10
labtrans = DeepHitSingle.label_transform(num_durations)
get_target = lambda df: (df['surv_train'].values, df['cen_train'].values)
y_train = labtrans.fit_transform(*get_target(df_train))
train = (x_train, y_train)

df_test = pd.read_csv("data/real_data/metabric_test.csv")

# II. Model's construction and training

The parameters of the architecture are the one listed in the parameters dataframe, selected by a 5-fold cross-validation among 100 sets of parameters. 

In [5]:
param = pd.read_csv("model/param_metabric.csv",sep=';', index_col = 0).T
param_final = param.loc[name]

In [6]:
print(param_final)

neurons            16
drop              0.4
activation        elu
lr_opt          0.005
optimizer     rmsprop
n_layers            3
Name: DeepHit, dtype: object


In [7]:
neurons = int(param_final['neurons'])
drop = float(param_final['drop'])
activation = param_final['activation']
lr_opt = float(param_final['lr_opt'])
optimizer = param_final['optimizer']
n_layers = int(param_final['n_layers'])

The objective function is used to define the architecture of the neural network. 

In [8]:
model,callbacks  = model_architecture.objective(x_train,  neurons, drop, activation, lr_opt, optimizer, n_layers,name, labtrans = labtrans)
log = model.fit(x_train, y_train,neurons, epochs = 100,callbacks = callbacks, verbose=True)

0:	[0s / 0s],		train_loss: 0.4732
1:	[0s / 1s],		train_loss: 0.4436
2:	[0s / 2s],		train_loss: 0.4352
3:	[0s / 3s],		train_loss: 0.4292
4:	[0s / 4s],		train_loss: 0.4204
5:	[0s / 5s],		train_loss: 0.4175
6:	[0s / 6s],		train_loss: 0.4086
7:	[0s / 7s],		train_loss: 0.4021
8:	[0s / 8s],		train_loss: 0.4009
9:	[0s / 9s],		train_loss: 0.3964
10:	[0s / 10s],		train_loss: 0.3899
11:	[0s / 11s],		train_loss: 0.3987
12:	[0s / 12s],		train_loss: 0.3939
13:	[0s / 13s],		train_loss: 0.3832
14:	[0s / 14s],		train_loss: 0.3789
15:	[0s / 15s],		train_loss: 0.3842
16:	[0s / 15s],		train_loss: 0.3825
17:	[1s / 17s],		train_loss: 0.3777
18:	[1s / 18s],		train_loss: 0.3823
19:	[1s / 19s],		train_loss: 0.3777
20:	[0s / 20s],		train_loss: 0.3759
21:	[1s / 21s],		train_loss: 0.3740
22:	[0s / 22s],		train_loss: 0.3731
23:	[0s / 23s],		train_loss: 0.3696
24:	[0s / 24s],		train_loss: 0.3647
25:	[0s / 25s],		train_loss: 0.3728
26:	[0s / 25s],		train_loss: 0.3682
27:	[0s / 26s],		train_loss: 0.3682
28:	[0s / 27

# III. Results

We sample 100 test sets iteratively among the test dataset to obtain bootstraped results. 

In [9]:
n_iterations = 100
results_all = output_results.output_bootstrap(model, n_iterations, df_train, data_train, y_train, df_test,name)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


We output the AUC value at 5 and 10 years and Uno's C-index at 5 and 10 years, with 95% Confidence Intervals.

In [10]:
results_all

Unnamed: 0,mean,ci95_lo,ci95_hi,std,count
auc5,0.708618,0.65048,0.76053,0.033173,100.0
auc10,0.709882,0.661293,0.769405,0.033538,100.0
unoc5,0.68682,0.637974,0.728844,0.028016,100.0
unoc10,0.656173,0.618298,0.698182,0.021996,100.0
