In [1]:
import os
main = os.chdir(os.path.dirname(os.path.dirname(os.getcwd())))

In [2]:
import numpy as np
import pandas as pd

from model import model_architecture, output_results, utils
from sksurv.util import Surv as skSurv

Using TensorFlow backend.


# I. Simulation data

We first choose the type of pseudo-observation among the followings:
- "pseudo_optim"
- "pseudo_km"
- "pseudo-discrete"

In [3]:
name = "pseudo_optim"

We choose the censoring rate of the simulated data (it can be either 0.2  or 0.6). Data is simulated by the random function generator introduced by Friedman et al. (2001). 
Data is normalized (with mean and std from train set for train and test set) and splitted into training and test set (df_train and df_test are subsets of df_sim). The same training and test set are used for all the models.

In [4]:
rate = 0.2 

In [5]:
dir_sim = "data/simulations/"+str(rate)+"/"

df_sim = pd.read_csv(dir_sim+'simdata.csv')
df_train = pd.read_csv(dir_sim+'sim_train.csv')
df_test = pd.read_csv(dir_sim+'sim_test.csv')
y_train = pd.read_csv(dir_sim+name+ "_" + str(rate) + ".csv")

In [6]:
x_train_all, y_train_all, x_test_all, y_test_all, n_picktime = utils.prepare_pseudobs_simu(df_train, y_train, df_test,name)    

# II. Model training

The model is trained using the best parameters determined by a 5 folds cross validation.  

In [7]:
param = pd.read_csv('model/best_param_simu/'+name+'_best_param.csv', sep = ";",index_col=0)
    
model,callbacks  = model_architecture.build_model_pseudobs(x_train_all, 
                           param['neurons'][rate], 
                           param['dropout'][rate], 
                           param['activation'][rate],
                           param['lr'][rate],
                           param['optimizer'][rate],
                           param['n_layers'][rate],
                              100)

history = model.fit(x_train_all, 
         y_train_all, 

         int(param['batch_size'][rate]), 
                    epochs = 100,
         callbacks = callbacks, 
         verbose = 0)

y_pred = model.predict(x_test_all)
y_pred = y_pred.reshape((n_picktime,len(df_test)))
y_pred = pd.DataFrame(y_pred)

if name == "pseudo_discrete" or name == "pseudo_optim2":
    y_pred_all = pd.DataFrame()
    for j in range(len(y_pred.columns)):    
        for i in range(len(y_pred)):
            y_pred_all.loc[i,j] = y_pred.loc[:i,j].prod(axis = 0)
            surv = y_pred_all
else:
    surv = y_pred

surv = surv.set_index(np.unique(y_train[['s']]))




# III. Results

We output the C-index at median time and the Integrated Brier Score. 

In [8]:
res = output_results.output_sim_data(model,surv,x_train_all, df_train, x_test_all, df_test)
res

Unnamed: 0,c_median,ibs
0,0.868813,0.104529
