In [79]:
### migrate code from tensorflow v1 to v2: 
# !tf_upgrade_v2 \
#   --infile class_DeepLongitudinal-Original.py \
#   --outfile class_DeepLongitudinal-Original_v2.py

# !tf_upgrade_v2 \
#   --infile utils_network-Original.py \
#   --outfile utils_network-Original_v2.py

In [80]:
_EPSILON = 1e-08

import numpy as np
import pandas as pd
import tensorflow as tf
import random
import os

from sklearn.model_selection import train_test_split

import import_data as impt
from tf_slim.layers import layers as _layers
from class_DeepLongitudinal import Model_Longitudinal_Attention

from utils_eval             import c_index, brier_score
from utils_log              import save_logging, load_logging
from utils_helper           import f_get_minibatch, f_get_boosted_trainset

In [81]:
def _f_get_pred(sess, model, data, data_mi, pred_horizon):
    '''
        predictions based on the prediction time.
        create new_data and new_mask2 that are available previous or equal to the prediction time (no future measurements are used)
    '''
    new_data    = np.zeros(np.shape(data))
    new_data_mi = np.zeros(np.shape(data_mi))

    meas_time = np.concatenate([np.zeros([np.shape(data)[0], 1]), np.cumsum(data[:, :, 0], axis=1)[:, :-1]], axis=1)

    for i in range(np.shape(data)[0]):
        last_meas = np.sum(meas_time[i, :] <= pred_horizon)

        new_data[i, :last_meas, :]    = data[i, :last_meas, :]
        new_data_mi[i, :last_meas, :] = data_mi[i, :last_meas, :]

    return model.predict(new_data, new_data_mi)


def f_get_risk_predictions(sess, model, data_, data_mi_, pred_time, eval_time):
    
    pred = _f_get_pred(sess, model, data_[[0]], data_mi_[[0]], 0)
    _, num_Event, num_Category = np.shape(pred)
       
    risk_all = {}
    for k in range(num_Event):
        risk_all[k] = np.zeros([np.shape(data_)[0], len(pred_time), len(eval_time)])
            
    for p, p_time in enumerate(pred_time):
        ### PREDICTION
        pred_horizon = int(p_time)
        pred = _f_get_pred(sess, model, data_, data_mi_, pred_horizon)


        for t, t_time in enumerate(eval_time):
            eval_horizon = int(t_time) + pred_horizon #if eval_horizon >= num_Category, output the maximum...

            # calculate F(t | x, Y, t >= t_M) = \sum_{t_M <= \tau < t} P(\tau | x, Y, \tau > t_M)
            risk = np.sum(pred[:,:,pred_horizon:(eval_horizon+1)], axis=2) #risk score until eval_time
            risk = risk / (np.sum(np.sum(pred[:,:,pred_horizon:], axis=2), axis=1, keepdims=True) +_EPSILON) #conditioniong on t > t_pred
            
            for k in range(num_Event):
                risk_all[k][:, p, t] = risk[:, k]
                
    return risk_all

In [82]:
### Set prediction time window (t) and evaluation time (delta t) for C-index and Brier-Score)
pred_time = list(range(16,33,1)) # prediction time (in months)
eval_time = list(range(1))  


### 1. Import Dataset
#####      - Users must prepare dataset in csv format and modify 'import_data.py' following our examplar 'PBC2'

In [83]:
df = pd.read_csv('./data/data_longi_expanded_var_for_dynamic_deephit_and_ts_extraction_2.csv')
# df = pd.read_csv('./data/data_longi_expanded_var_for_dynamic_deephit.csv')


trainingid_all = pd.read_csv('./data/all_training_set_ID_2.csv')
validationid_all = pd.read_csv('./data/all_validation_set_ID_2.csv')
testingid_all = pd.read_csv('./data/all_testing_set_ID_2.csv')



In [84]:
df

Unnamed: 0,ID,event,exam_year,time,AGE_Y0,MALE,RACEBLACK,ARMCI,ASMA,BEER,...,PULSE,SMKNW,WGT,WINE,WST,HBM,DBP,SBP,CHNOW,PATCK
0,100033323702,0,0,32.134155,22,1,0,0.0,0,8,...,34,1,161.0,0,75.0,0,74.0,117.0,0,0
1,100033323702,0,2,32.134155,22,1,0,30.0,1,2,...,37,1,159.5,0,80.5,0,69.0,116.0,0,1
2,100033323702,0,5,32.134155,22,1,0,30.0,0,2,...,30,0,169.0,0,78.5,0,73.0,105.0,0,0
3,100033323702,0,7,32.134155,22,1,0,32.5,0,1,...,38,0,174.5,0,81.0,0,72.0,115.0,0,0
4,100033323702,0,15,32.134155,22,1,0,35.0,0,0,...,38,0,178.0,0,82.5,0,71.0,107.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19983,416817227898,0,2,31.770021,22,1,1,38.0,0,8,...,27,1,199.5,0,95.5,0,63.0,102.0,0,0
19984,416817227898,0,5,31.770021,22,1,1,36.0,0,24,...,35,1,204.0,0,104.0,0,101.0,133.0,0,0
19985,416817227898,0,7,31.770021,22,1,1,36.0,0,28,...,33,1,196.0,0,94.0,0,77.0,133.0,0,0
19986,416817227898,0,10,31.770021,22,1,1,35.5,0,12,...,30,0,192.0,0,92.0,0,66.0,112.0,0,0


In [85]:
df.columns.values

array(['ID', 'event', 'exam_year', 'time', 'AGE_Y0', 'MALE', 'RACEBLACK',
       'ARMCI', 'ASMA', 'BEER', 'BMI', 'CANCR', 'CGTDY', 'CHOL', 'DFPAY',
       'DIAB', 'ED', 'GALL', 'GLU', 'HDL', 'KIDNY', 'LDL', 'LIFE', 'LIQR',
       'LIVER', 'MENTL', 'NPREG', 'NTRIG', 'PSTYR', 'PULSE', 'SMKNW',
       'WGT', 'WINE', 'WST', 'HBM', 'DBP', 'SBP', 'CHNOW', 'PATCK'],
      dtype=object)

In [86]:
bin_list = ['MALE', 'RACEBLACK', 'ASMA', 'CANCR', 'DIAB'
                                 ,  'GALL', 'KIDNY', 'LIVER', 'MENTL', 'SMKNW', 'HBM', 'CHNOW', 'PATCK']
cont_list = ['AGE_Y0', 'ARMCI', 'BEER', 'BMI', 'CGTDY', 'CHOL'
       , 'ED', 'HDL', 'LDL', 'LIFE', 'LIQR'
       , 'NPREG', 'NTRIG', 'PSTYR', 'PULSE', 'WGT'
       , 'WINE', 'WST', 'DBP', 'SBP', 'GLU', 'DFPAY']
len(bin_list)+len(cont_list)

35

In [87]:
#data_mode                   = 'PBC2' 
data_mode                   = 'CARDIA_ASCVD' 
seed                        = 1234

##### IMPORT DATASET
'''
    num_Category            = max event/censoring time * 1.2
    num_Event               = number of evetns i.e. len(np.unique(label))-1
    max_length              = maximum number of measurements
    x_dim                   = data dimension including delta (1 + num_features)
    x_dim_cont              = dim of continuous features
    x_dim_bin               = dim of binary features
    mask1, mask2, mask3     = used for cause-specific network (FCNet structure)
'''

# (x_dim, x_dim_cont, x_dim_bin), (data, time, label), (mask1, mask2, mask3), (data_mi) = impt.import_dataset(norm_mode = 'standard')



(x_dim, x_dim_cont, x_dim_bin), (data, time, label), (mask1, mask2, mask3), (data_mi) = impt.import_dataset(df_ = df
                  , bin_list = bin_list
                  , cont_list = cont_list
                   , norm_mode = 'standard')





_, num_Event, num_Category  = np.shape(mask1)  # dim of mask3: [subj, Num_Event, Num_Category]
max_length                  = np.shape(data)[1]


file_path = '{}'.format(data_mode)

if not os.path.exists(file_path):
    os.makedirs(file_path)

In [88]:
data.shape

(3539, 6, 36)

In [89]:
data[1,:,:10]

array([[ 2.        ,  0.82808163,  1.13869829, -0.11009384,  0.37073586,
        -0.48525198,  1.16086178,  0.19003271, -0.31613944,  1.24086131],
       [ 3.        ,  0.82808163,  0.74225786, -0.25547939,  0.27192247,
        -0.48525198,  0.2891797 ,  0.19003271, -0.81612209,  0.80177978],
       [ 2.        ,  0.82808163,  1.13869829, -0.11009384,  0.69845156,
        -0.48525198,  0.55068432,  0.58801822, -0.60184381,  0.83314274],
       [ 3.        ,  0.82808163,  1.03958819, -0.40086494,  0.84337614,
        -0.48525198,  1.36425426,  0.58801822, -0.6732699 ,  1.36631318],
       [ 5.        ,  0.82808163,  1.03958819, -0.40086494,  0.64729049,
        -0.48525198,  1.18991785, -0.20795281, -1.03040037,  1.55449097],
       [ 0.        ,  0.82808163,  0.94047808, -0.25547939,  0.63102808,
        -0.48525198, -0.58250238, -0.20795281, -0.31613944, -0.48410186]])

### 2. Set Hyper-Parameters
##### - Play with your own hyper-parameters!

In [103]:
burn_in_mode                = 'ON' #{'ON', 'OFF'}
boost_mode                  = 'ON' #{'ON', 'OFF'}

##### HYPER-PARAMETERS
new_parser = {'mb_size': 2, # 4 #64

             'iteration_burn_in': 3000,
             'iteration': 25000,

             'keep_prob': 0.6, # 0.6
             'lr_train': 1e-4,

             'h_dim_RNN': 100,
             'h_dim_FC' : 100,
             'num_layers_RNN':14, #8 #6 #4 #2
             'num_layers_ATT':2,
             'num_layers_CS' :2,

             'RNN_type':'LSTM', #{'LSTM', 'GRU'}

             'FC_active_fn' : tf.nn.relu,
             'RNN_active_fn': tf.nn.tanh,

            'reg_W'         : 1e-5,
            'reg_W_out'     : 0.,

             'alpha' :1.0,
             'beta'  :0.1,
             'gamma' :1.0
}


# INPUT DIMENSIONS
input_dims                  = { 'x_dim'         : x_dim,
                                'x_dim_cont'    : x_dim_cont,
                                'x_dim_bin'     : x_dim_bin,
                                'num_Event'     : num_Event,
                                'num_Category'  : num_Category,
                                'max_length'    : max_length }

# NETWORK HYPER-PARMETERS
network_settings            = { 'h_dim_RNN'         : new_parser['h_dim_RNN'],
                                'h_dim_FC'          : new_parser['h_dim_FC'],
                                'num_layers_RNN'    : new_parser['num_layers_RNN'],
                                'num_layers_ATT'    : new_parser['num_layers_ATT'],
                                'num_layers_CS'     : new_parser['num_layers_CS'],
                                'RNN_type'          : new_parser['RNN_type'],
                                'FC_active_fn'      : new_parser['FC_active_fn'],
                                'RNN_active_fn'     : new_parser['RNN_active_fn'],
                               # 'initial_W'         : tf.contrib.layers.xavier_initializer(),
                               
                                'initial_W'         : tf.keras.initializers.glorot_normal(),

                               
                                'reg_W'             : new_parser['reg_W'],
                                'reg_W_out'         : new_parser['reg_W_out']
                                 }


mb_size           = new_parser['mb_size']
iteration         = new_parser['iteration']
iteration_burn_in = new_parser['iteration_burn_in']

keep_prob         = new_parser['keep_prob']
lr_train          = new_parser['lr_train']

alpha             = new_parser['alpha']
beta              = new_parser['beta']
gamma             = new_parser['gamma']

# SAVE HYPERPARAMETERS
log_name = file_path + '/hyperparameters_log.txt'
save_logging(new_parser, log_name)

### 4. Train the Network

In [91]:
# ## Tuning: training with number of iteration 25000 -> 50000

# fold = 1
# print('FOLD '+str(fold) + '...')

# ##### get training, testing, and validation data:
# df_train = df.loc[df['ID'].isin(trainingid_all.iloc[:,fold])]
# df_val = df.loc[df['ID'].isin(validationid_all.iloc[:,fold])]
# df_test = df.loc[df['ID'].isin(testingid_all.iloc[:,fold])]

# # ### TRAINING-TESTING SPLIT in the format suitable for this network

# (x_dim, x_dim_cont, x_dim_bin), (te_data, te_time, te_label), (te_mask1, te_mask2, te_mask3), (te_data_mi) = impt.import_dataset(df_ = df_test)
# (x_dim, x_dim_cont, x_dim_bin), (va_data, va_time, va_label), (va_mask1, va_mask2, va_mask3), (va_data_mi) = impt.import_dataset(df_ = df_val)
# (x_dim, x_dim_cont, x_dim_bin), (tr_data, tr_time, tr_label), (tr_mask1, tr_mask2, tr_mask3), (tr_data_mi) = impt.import_dataset(df_ = df_train)

# if boost_mode == 'ON':
#     tr_data, tr_data_mi, tr_time, tr_label, tr_mask1, tr_mask2, tr_mask3 = f_get_boosted_trainset(tr_data, tr_data_mi, tr_time, tr_label, tr_mask1, tr_mask2, tr_mask3)  







# ##### CREATE AND TRAIN NETWORK:
# # tf.reset_default_graph()
# tf.compat.v1.reset_default_graph()

# # config = tf.ConfigProto()
# config = tf.compat.v1.ConfigProto()

# config.gpu_options.allow_growth = True
# sess = tf.compat.v1.Session(config=config)

# model = Model_Longitudinal_Attention(sess, "Dyanmic-DeepHit", input_dims, network_settings)
# # saver = tf.train.Saver()
# saver = tf.compat.v1.train.Saver()

# # sess.run(tf.global_variables_initializer())
# sess.run(tf.compat.v1.global_variables_initializer())

# ### TRAINING - BURN-IN
# if burn_in_mode == 'ON':
#     print( "BURN-IN TRAINING ...")
#     for itr in range(iteration_burn_in):
#         x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
#         DATA = (x_mb, k_mb, t_mb)
#         MISSING = (x_mi_mb)

#         _, loss_curr = model.train_burn_in(DATA, MISSING, keep_prob, lr_train)

#         if (itr+1)%1000 == 0:
#             print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))


# ### TRAINING - MAIN
# print( "MAIN TRAINING ...")
# min_valid = 0.5

# for itr in range(iteration):
#     x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
#     DATA = (x_mb, k_mb, t_mb)
#     MASK = (m1_mb, m2_mb, m3_mb)
#     MISSING = (x_mi_mb)
#     PARAMETERS = (alpha, beta, gamma)

#     _, loss_curr = model.train(DATA, MASK, MISSING, PARAMETERS, keep_prob, lr_train)

#     if (itr+1)%1000 == 0:
#         print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))

#     ### VALIDATION  (based on average C-index of our interest)
#     if (itr+1)%1000 == 0:        
#         risk_all = f_get_risk_predictions(sess, model, va_data, va_data_mi, pred_time, eval_time)

#         for p, p_time in enumerate(pred_time):
#             pred_horizon = int(p_time)
#             val_result1 = np.zeros([num_Event, len(eval_time)])

#             for t, t_time in enumerate(eval_time):                
#                 eval_horizon = int(t_time) + pred_horizon
#                 for k in range(num_Event):
#                     val_result1[k, t] = c_index(risk_all[k][:, p, t], va_time, (va_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)

#             if p == 0:
#                 val_final1 = val_result1
#             else:
#                 val_final1 = np.append(val_final1, val_result1, axis=0)

#         tmp_valid = np.mean(val_final1)

#         if tmp_valid >  min_valid:
#             min_valid = tmp_valid
#             saver.save(sess, file_path + '/model')
#             print( 'updated.... average c-index = ' + str('%.4f' %(tmp_valid)))








# ### PREDICTION ON TEST SET               
# #saver.restore(sess, file_path + '/model')

# risk_all = f_get_risk_predictions(sess, model, te_data, te_data_mi, pred_time, eval_time)

# for p, p_time in enumerate(pred_time):
#     pred_horizon = int(p_time)
#     result1, result2 = np.zeros([num_Event, len(eval_time)]), np.zeros([num_Event, len(eval_time)])

#     for t, t_time in enumerate(eval_time):                
#         eval_horizon = int(t_time) + pred_horizon
#         for k in range(num_Event):
#             result1[k, t] = c_index(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
#             result2[k, t] = brier_score(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)

#     if p == 0:
#         final1, final2 = result1, result2
#     else:
#         final1, final2 = np.append(final1, result1, axis=0), np.append(final2, result2, axis=0)








# ### PRINT PERFORMANCE RESULTS
# row_header = []
# for p_time in pred_time:
#     for t in range(num_Event):
#         row_header.append('pred_time {}: event_{}'.format(p_time,k+1))

# col_header = []
# for t_time in eval_time:
#     col_header.append('eval_time {}'.format(t_time))


# # c-index result
# df1 = pd.DataFrame(final1, index = row_header, columns=col_header)

# # brier-score result
# df2 = pd.DataFrame(final2, index = row_header, columns=col_header)

# print('========================================================')
# print('--------------------------------------------------------')
# print('- C-INDEX: ')
# print(df1)
# print('--------------------------------------------------------')
# print('- BRIER-SCORE: ')
# print(df2)
# print('========================================================')








# ### SAVE C-INDEX, BRIER SCORE, and PREDICTED PROB RISK ON TEST SET
# actual_fold = fold+1
# work_dir = 'U:/Hieu/CARDIA_longi_project'
# savedir = os.path.join(work_dir,'rdata_files/dynamic_deephit_expanded_var_y15_2_fold_'+str(actual_fold)+'/')
# try: 
#     os.makedirs(savedir)
# except OSError:
#     if not os.path.isdir(savedir):
#         raise



# c_over_time = df1.iloc[:,0]
# # c_over_time.to_csv(savedir+'/c_index.csv', index = None, header = True)

# brier_over_time = df2.iloc[:,0]
# # brier_over_time.to_csv(savedir+'/brier_score.csv', index = None, header = True)



# prob_risk_test_df = pd.DataFrame(risk_all[0][:,:,0])
# prob_risk_test_df.columns = pred_time
# prob_risk_test_df.insert(loc=0, column='ID', value=np.unique(df_test['ID']))
# # prob_risk_test_df.to_csv(savedir+'/prob_risk_test.csv', index = None, header = True)



In [104]:
### TRAINING AND TESTING IN LOOP:

In [105]:
nfolds = 10

for fold in range(nfolds):


    print('FOLD '+str(fold) + '...')
    
    ##### get training, testing, and validation data:
    df_train = df.loc[df['ID'].isin(trainingid_all.iloc[:,fold])]
    df_val = df.loc[df['ID'].isin(validationid_all.iloc[:,fold])]
    df_test = df.loc[df['ID'].isin(testingid_all.iloc[:,fold])]

    # ### TRAINING-TESTING SPLIT in the format suitable for this network

    (x_dim, x_dim_cont, x_dim_bin), (te_data, te_time, te_label), (te_mask1, te_mask2, te_mask3), (te_data_mi) = impt.import_dataset(df_ = df_test)
    (x_dim, x_dim_cont, x_dim_bin), (va_data, va_time, va_label), (va_mask1, va_mask2, va_mask3), (va_data_mi) = impt.import_dataset(df_ = df_val)
    (x_dim, x_dim_cont, x_dim_bin), (tr_data, tr_time, tr_label), (tr_mask1, tr_mask2, tr_mask3), (tr_data_mi) = impt.import_dataset(df_ = df_train)

    if boost_mode == 'ON':
        tr_data, tr_data_mi, tr_time, tr_label, tr_mask1, tr_mask2, tr_mask3 = f_get_boosted_trainset(tr_data, tr_data_mi, tr_time, tr_label, tr_mask1, tr_mask2, tr_mask3)  



        
        


    ##### CREATE AND TRAIN NETWORK:
    # tf.reset_default_graph()
    tf.compat.v1.reset_default_graph()

    # config = tf.ConfigProto()
    config = tf.compat.v1.ConfigProto()

    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)

    model = Model_Longitudinal_Attention(sess, "Dyanmic-DeepHit", input_dims, network_settings)
    # saver = tf.train.Saver()
    saver = tf.compat.v1.train.Saver()

    # sess.run(tf.global_variables_initializer())
    sess.run(tf.compat.v1.global_variables_initializer())

    ### TRAINING - BURN-IN
    if burn_in_mode == 'ON':
        print( "BURN-IN TRAINING ...")
        for itr in range(iteration_burn_in):
            x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
            DATA = (x_mb, k_mb, t_mb)
            MISSING = (x_mi_mb)

            _, loss_curr = model.train_burn_in(DATA, MISSING, keep_prob, lr_train)

            if (itr+1)%1000 == 0:
                print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))


    ### TRAINING - MAIN
    print( "MAIN TRAINING ...")
    min_valid = 0.5

    for itr in range(iteration):
        x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
        DATA = (x_mb, k_mb, t_mb)
        MASK = (m1_mb, m2_mb, m3_mb)
        MISSING = (x_mi_mb)
        PARAMETERS = (alpha, beta, gamma)

        _, loss_curr = model.train(DATA, MASK, MISSING, PARAMETERS, keep_prob, lr_train)

        if (itr+1)%1000 == 0:
            print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))

        ### VALIDATION  (based on average C-index of our interest)
        if (itr+1)%1000 == 0:        
            risk_all = f_get_risk_predictions(sess, model, va_data, va_data_mi, pred_time, eval_time)

            for p, p_time in enumerate(pred_time):
                pred_horizon = int(p_time)
                val_result1 = np.zeros([num_Event, len(eval_time)])

                for t, t_time in enumerate(eval_time):                
                    eval_horizon = int(t_time) + pred_horizon
                    for k in range(num_Event):
                        val_result1[k, t] = c_index(risk_all[k][:, p, t], va_time, (va_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)

                if p == 0:
                    val_final1 = val_result1
                else:
                    val_final1 = np.append(val_final1, val_result1, axis=0)

            tmp_valid = np.mean(val_final1)

            if tmp_valid >  min_valid:
                min_valid = tmp_valid
                saver.save(sess, file_path + '/model')
                print( 'updated.... average c-index = ' + str('%.4f' %(tmp_valid)))

                
                
                
         
        
        
                
    ### PREDICTION ON TEST SET               
    #saver.restore(sess, file_path + '/model')
 
    risk_all = f_get_risk_predictions(sess, model, te_data, te_data_mi, pred_time, eval_time)

    for p, p_time in enumerate(pred_time):
        pred_horizon = int(p_time)
        result1, result2 = np.zeros([num_Event, len(eval_time)]), np.zeros([num_Event, len(eval_time)])

        for t, t_time in enumerate(eval_time):                
            eval_horizon = int(t_time) + pred_horizon
            for k in range(num_Event):
                result1[k, t] = c_index(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
                result2[k, t] = brier_score(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)

        if p == 0:
            final1, final2 = result1, result2
        else:
            final1, final2 = np.append(final1, result1, axis=0), np.append(final2, result2, axis=0)


            
            
    
    
    
    
    ### PRINT PERFORMANCE RESULTS
    row_header = []
    for p_time in pred_time:
        for t in range(num_Event):
            row_header.append('pred_time {}: event_{}'.format(p_time,k+1))

    col_header = []
    for t_time in eval_time:
        col_header.append('eval_time {}'.format(t_time))


    # c-index result
    df1 = pd.DataFrame(final1, index = row_header, columns=col_header)

    # brier-score result
    df2 = pd.DataFrame(final2, index = row_header, columns=col_header)

    print('========================================================')
    print('--------------------------------------------------------')
    print('- C-INDEX: ')
    print(df1)
    print('--------------------------------------------------------')
    print('- BRIER-SCORE: ')
    print(df2)
    print('========================================================')
    
    
    
    
    
    
    
    
    ### SAVE C-INDEX, BRIER SCORE, and PREDICTED PROB RISK ON TEST SET
    actual_fold = fold+1
    work_dir = 'U:/Hieu/CARDIA_longi_project'
    savedir = os.path.join(work_dir,'rdata_files/dynamic_deephit_expanded_var_y15_2_3_fold_'+str(actual_fold)+'/')
    try: 
        os.makedirs(savedir)
    except OSError:
        if not os.path.isdir(savedir):
            raise



    c_over_time = df1.iloc[:,0]
    c_over_time.to_csv(savedir+'/c_index.csv', index = None, header = True)

    brier_over_time = df2.iloc[:,0]
    brier_over_time.to_csv(savedir+'/brier_score.csv', index = None, header = True)



    prob_risk_test_df = pd.DataFrame(risk_all[0][:,:,0])
    prob_risk_test_df.columns = pred_time
    prob_risk_test_df.insert(loc=0, column='ID', value=np.unique(df_test['ID']))
    prob_risk_test_df.to_csv(savedir+'/prob_risk_test.csv', index = None, header = True)



FOLD 0...




BURN-IN TRAINING ...
itr: 1000 | loss: 0.9841
itr: 2000 | loss: 2.1010
itr: 3000 | loss: 4.3718
MAIN TRAINING ...
itr: 1000 | loss: 1.5212
updated.... average c-index = 0.6673
itr: 2000 | loss: 1.6126
updated.... average c-index = 0.7608
itr: 3000 | loss: 4.6916
updated.... average c-index = 0.7757
itr: 4000 | loss: 2.2657
itr: 5000 | loss: 4.0656
itr: 6000 | loss: 5.7203
updated.... average c-index = 0.7824
itr: 7000 | loss: 1.9063
itr: 8000 | loss: 3.3388
updated.... average c-index = 0.7986
itr: 9000 | loss: 2.5992
itr: 10000 | loss: 3.5783
itr: 11000 | loss: 1.1218
updated.... average c-index = 0.8026
itr: 12000 | loss: 0.8764
updated.... average c-index = 0.8131
itr: 13000 | loss: 2.5434
itr: 14000 | loss: 1.4077
itr: 15000 | loss: 2.0619
itr: 16000 | loss: 9.3406
itr: 17000 | loss: 2.6736
itr: 18000 | loss: 1.0669
itr: 19000 | loss: 1.5238
itr: 20000 | loss: 3.9065
itr: 21000 | loss: 1.3587
itr: 22000 | loss: 1.5364
itr: 23000 | loss: 0.7821
itr: 24000 | loss: 0.4045
itr: 25000 |



BURN-IN TRAINING ...
itr: 1000 | loss: 0.3765
itr: 2000 | loss: 0.6930
itr: 3000 | loss: 2.3467
MAIN TRAINING ...
itr: 1000 | loss: 2.7992
itr: 2000 | loss: 4.2098
itr: 3000 | loss: 1.1203
updated.... average c-index = 0.5114
itr: 4000 | loss: 6.1130
updated.... average c-index = 0.5325
itr: 5000 | loss: 1.6648
updated.... average c-index = 0.5458
itr: 6000 | loss: 2.5664
updated.... average c-index = 0.5793
itr: 7000 | loss: 1.4815
updated.... average c-index = 0.5938
itr: 8000 | loss: 2.4031
updated.... average c-index = 0.6025
itr: 9000 | loss: 1.8370
updated.... average c-index = 0.6149
itr: 10000 | loss: 0.7023
updated.... average c-index = 0.6175
itr: 11000 | loss: 1.4111
updated.... average c-index = 0.6268
itr: 12000 | loss: 0.5209
itr: 13000 | loss: 0.2376
itr: 14000 | loss: 1.8225
itr: 15000 | loss: 3.1495
itr: 16000 | loss: 2.3388
itr: 17000 | loss: 1.1114
itr: 18000 | loss: 2.3743
itr: 19000 | loss: 4.1710
itr: 20000 | loss: 1.3459
itr: 21000 | loss: 4.2831
itr: 22000 | los



BURN-IN TRAINING ...
itr: 1000 | loss: 2.2851
itr: 2000 | loss: 2.2054
itr: 3000 | loss: 0.9306
MAIN TRAINING ...
itr: 1000 | loss: 2.1943
itr: 2000 | loss: 3.0154
itr: 3000 | loss: 2.2231
itr: 4000 | loss: 3.2739
updated.... average c-index = 0.5520
itr: 5000 | loss: 1.6331
updated.... average c-index = 0.5683
itr: 6000 | loss: 2.3267
updated.... average c-index = 0.6023
itr: 7000 | loss: 4.2480
updated.... average c-index = 0.6349
itr: 8000 | loss: 1.5653
updated.... average c-index = 0.6415
itr: 9000 | loss: 1.9824
updated.... average c-index = 0.6451
itr: 10000 | loss: 1.6202
updated.... average c-index = 0.6490
itr: 11000 | loss: 1.2913
itr: 12000 | loss: 2.7753
itr: 13000 | loss: 1.6775
itr: 14000 | loss: 2.2251
itr: 15000 | loss: 1.6808
itr: 16000 | loss: 3.6664
itr: 17000 | loss: 1.5899
itr: 18000 | loss: 3.0707
itr: 19000 | loss: 2.7667
itr: 20000 | loss: 1.4642
itr: 21000 | loss: 5.2506
itr: 22000 | loss: 1.2720
itr: 23000 | loss: 7.8869
itr: 24000 | loss: 1.8920
itr: 25000 |



BURN-IN TRAINING ...
itr: 1000 | loss: 5.7960
itr: 2000 | loss: 2.1917
itr: 3000 | loss: 0.7365
MAIN TRAINING ...
itr: 1000 | loss: 3.2116
itr: 2000 | loss: 2.8738
itr: 3000 | loss: 1.2876
itr: 4000 | loss: 2.5159
updated.... average c-index = 0.5042
itr: 5000 | loss: 4.9533
updated.... average c-index = 0.5724
itr: 6000 | loss: 1.6223
updated.... average c-index = 0.6341
itr: 7000 | loss: 2.7828
updated.... average c-index = 0.6563
itr: 8000 | loss: 5.8360
updated.... average c-index = 0.6590
itr: 9000 | loss: 2.8260
updated.... average c-index = 0.6981
itr: 10000 | loss: 1.3082
itr: 11000 | loss: 13.3568
itr: 12000 | loss: 2.0715
updated.... average c-index = 0.7005
itr: 13000 | loss: 1.7643
updated.... average c-index = 0.7116
itr: 14000 | loss: 0.8754
updated.... average c-index = 0.7228
itr: 15000 | loss: 0.4425
itr: 16000 | loss: 1.5847
itr: 17000 | loss: 2.9843
itr: 18000 | loss: 1.8468
itr: 19000 | loss: 0.7156
updated.... average c-index = 0.7251
itr: 20000 | loss: 1.7114
upda



BURN-IN TRAINING ...
itr: 1000 | loss: 2.1789
itr: 2000 | loss: 5.4930
itr: 3000 | loss: 1.8820
MAIN TRAINING ...
itr: 1000 | loss: 6.3418
updated.... average c-index = 0.6235
itr: 2000 | loss: 2.1372
updated.... average c-index = 0.6661
itr: 3000 | loss: 2.8402
itr: 4000 | loss: 3.0922
itr: 5000 | loss: 1.5858
itr: 6000 | loss: 3.1020
updated.... average c-index = 0.6700
itr: 7000 | loss: 3.0811
itr: 8000 | loss: 3.5403
itr: 9000 | loss: 1.0302
itr: 10000 | loss: 2.9338
itr: 11000 | loss: 2.5267
itr: 12000 | loss: 1.6548
itr: 13000 | loss: 2.3056
itr: 14000 | loss: 4.8777
itr: 15000 | loss: 1.1459
itr: 16000 | loss: 2.0909
itr: 17000 | loss: 4.8941
itr: 18000 | loss: 6.5714
updated.... average c-index = 0.6812
itr: 19000 | loss: 4.6599
itr: 20000 | loss: 2.7742
itr: 21000 | loss: 3.7349
updated.... average c-index = 0.6913
itr: 22000 | loss: 0.7383
itr: 23000 | loss: 1.3291
itr: 24000 | loss: 5.9712
itr: 25000 | loss: 1.9216
--------------------------------------------------------
- C



BURN-IN TRAINING ...
itr: 1000 | loss: 0.7545
itr: 2000 | loss: 3.0625
itr: 3000 | loss: 2.8232
MAIN TRAINING ...
itr: 1000 | loss: 2.4247
itr: 2000 | loss: 2.5831
updated.... average c-index = 0.5072
itr: 3000 | loss: 6.2494
updated.... average c-index = 0.6198
itr: 4000 | loss: 1.7752
updated.... average c-index = 0.6902
itr: 5000 | loss: 4.0008
updated.... average c-index = 0.7232
itr: 6000 | loss: 3.1218
updated.... average c-index = 0.7250
itr: 7000 | loss: 4.6441
updated.... average c-index = 0.7654
itr: 8000 | loss: 0.7951
updated.... average c-index = 0.7925
itr: 9000 | loss: 3.6192
updated.... average c-index = 0.8021
itr: 10000 | loss: 5.0040
updated.... average c-index = 0.8154
itr: 11000 | loss: 1.5209
updated.... average c-index = 0.8179
itr: 12000 | loss: 7.7166
updated.... average c-index = 0.8210
itr: 13000 | loss: 3.3343
itr: 14000 | loss: 2.1136
itr: 15000 | loss: 1.9751
itr: 16000 | loss: 8.8918
itr: 17000 | loss: 3.5875
itr: 18000 | loss: 1.9094
itr: 19000 | loss: 1



BURN-IN TRAINING ...
itr: 1000 | loss: 1.2868
itr: 2000 | loss: 3.1824
itr: 3000 | loss: 1.5893
MAIN TRAINING ...
itr: 1000 | loss: 2.9678
itr: 2000 | loss: 2.3659
itr: 3000 | loss: 3.0313
updated.... average c-index = 0.5041
itr: 4000 | loss: 2.9935
updated.... average c-index = 0.5445
itr: 5000 | loss: 4.7492
itr: 6000 | loss: 2.7332
updated.... average c-index = 0.6184
itr: 7000 | loss: 0.7044
updated.... average c-index = 0.6413
itr: 8000 | loss: 2.8696
updated.... average c-index = 0.7058
itr: 9000 | loss: 1.1153
itr: 10000 | loss: 2.3559
updated.... average c-index = 0.7191
itr: 11000 | loss: 1.9476
updated.... average c-index = 0.7263
itr: 12000 | loss: 1.7396
itr: 13000 | loss: 5.6368
updated.... average c-index = 0.7298
itr: 14000 | loss: 1.3585
updated.... average c-index = 0.7319
itr: 15000 | loss: 1.1214
updated.... average c-index = 0.7342
itr: 16000 | loss: 1.7907
updated.... average c-index = 0.7382
itr: 17000 | loss: 3.6276
itr: 18000 | loss: 1.6748
itr: 19000 | loss: 2



BURN-IN TRAINING ...
itr: 1000 | loss: 1.9354
itr: 2000 | loss: 2.6665
itr: 3000 | loss: 3.1839
MAIN TRAINING ...
itr: 1000 | loss: 2.6284
updated.... average c-index = 0.6607
itr: 2000 | loss: 3.4967
itr: 3000 | loss: 2.4623
updated.... average c-index = 0.7061
itr: 4000 | loss: 2.8889
updated.... average c-index = 0.7696
itr: 5000 | loss: 1.8819
itr: 6000 | loss: 1.2414
itr: 7000 | loss: 0.7712
itr: 8000 | loss: 2.0418
updated.... average c-index = 0.7976
itr: 9000 | loss: 1.2366
updated.... average c-index = 0.8165
itr: 10000 | loss: 1.6107
updated.... average c-index = 0.8232
itr: 11000 | loss: 0.8986
updated.... average c-index = 0.8288
itr: 12000 | loss: 1.2127
itr: 13000 | loss: 3.7369
itr: 14000 | loss: 2.3247
itr: 15000 | loss: 1.3140
itr: 16000 | loss: 2.4858
updated.... average c-index = 0.8334
itr: 17000 | loss: 2.0043
updated.... average c-index = 0.8429
itr: 18000 | loss: 1.8238
updated.... average c-index = 0.8467
itr: 19000 | loss: 0.8066
updated.... average c-index = 0



BURN-IN TRAINING ...
itr: 1000 | loss: 2.9617
itr: 2000 | loss: 6.2576
itr: 3000 | loss: 2.4045
MAIN TRAINING ...
itr: 1000 | loss: 1.8662
updated.... average c-index = 0.5325
itr: 2000 | loss: 1.2750
updated.... average c-index = 0.6103
itr: 3000 | loss: 1.6719
updated.... average c-index = 0.6343
itr: 4000 | loss: 8.5313
updated.... average c-index = 0.6521
itr: 5000 | loss: 2.7228
updated.... average c-index = 0.6737
itr: 6000 | loss: 2.1903
updated.... average c-index = 0.6971
itr: 7000 | loss: 3.9314
updated.... average c-index = 0.7472
itr: 8000 | loss: 2.4875
updated.... average c-index = 0.7647
itr: 9000 | loss: 1.3820
updated.... average c-index = 0.7765
itr: 10000 | loss: 1.0681
itr: 11000 | loss: 2.7608
itr: 12000 | loss: 1.8272
updated.... average c-index = 0.7871
itr: 13000 | loss: 3.3573
updated.... average c-index = 0.8059
itr: 14000 | loss: 3.7159
itr: 15000 | loss: 3.7854
itr: 16000 | loss: 1.6502
itr: 17000 | loss: 1.3803
updated.... average c-index = 0.8069
itr: 1800



BURN-IN TRAINING ...
itr: 1000 | loss: 6.0021
itr: 2000 | loss: 4.1142
itr: 3000 | loss: 0.9148
MAIN TRAINING ...
itr: 1000 | loss: 3.6035
updated.... average c-index = 0.6678
itr: 2000 | loss: 6.0322
itr: 3000 | loss: 1.6799
updated.... average c-index = 0.7430
itr: 4000 | loss: 3.7569
updated.... average c-index = 0.7500
itr: 5000 | loss: 1.4080
updated.... average c-index = 0.7782
itr: 6000 | loss: 4.9016
updated.... average c-index = 0.8038
itr: 7000 | loss: 4.8787
updated.... average c-index = 0.8470
itr: 8000 | loss: 0.7508
updated.... average c-index = 0.8508
itr: 9000 | loss: 7.8572
itr: 10000 | loss: 1.9582
itr: 11000 | loss: 1.3306
itr: 12000 | loss: 0.6632
itr: 13000 | loss: 1.9951
itr: 14000 | loss: 3.1863
itr: 15000 | loss: 2.1716
itr: 16000 | loss: 2.1707
updated.... average c-index = 0.8543
itr: 17000 | loss: 4.2353
itr: 18000 | loss: 7.1410
itr: 19000 | loss: 3.4831
itr: 20000 | loss: 3.2025
updated.... average c-index = 0.8653
itr: 21000 | loss: 2.8855
itr: 22000 | los

In [None]:
# ##### CREATE DYNAMIC-DEEPFHT NETWORK
# # tf.reset_default_graph()
# tf.compat.v1.reset_default_graph()

# # config = tf.ConfigProto()
# config = tf.compat.v1.ConfigProto()

# config.gpu_options.allow_growth = True
# sess = tf.compat.v1.Session(config=config)

# model = Model_Longitudinal_Attention(sess, "Dyanmic-DeepHit", input_dims, network_settings)
# # saver = tf.train.Saver()
# saver = tf.compat.v1.train.Saver()

# # sess.run(tf.global_variables_initializer())
# sess.run(tf.compat.v1.global_variables_initializer())
 
# ### TRAINING - BURN-IN
# if burn_in_mode == 'ON':
#     print( "BURN-IN TRAINING ...")
#     for itr in range(iteration_burn_in):
#         x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
#         DATA = (x_mb, k_mb, t_mb)
#         MISSING = (x_mi_mb)

#         _, loss_curr = model.train_burn_in(DATA, MISSING, keep_prob, lr_train)

#         if (itr+1)%1000 == 0:
#             print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))


# ### TRAINING - MAIN
# print( "MAIN TRAINING ...")
# min_valid = 0.5

# for itr in range(iteration):
#     x_mb, x_mi_mb, k_mb, t_mb, m1_mb, m2_mb, m3_mb = f_get_minibatch(mb_size, tr_data, tr_data_mi, tr_label, tr_time, tr_mask1, tr_mask2, tr_mask3)
#     DATA = (x_mb, k_mb, t_mb)
#     MASK = (m1_mb, m2_mb, m3_mb)
#     MISSING = (x_mi_mb)
#     PARAMETERS = (alpha, beta, gamma)

#     _, loss_curr = model.train(DATA, MASK, MISSING, PARAMETERS, keep_prob, lr_train)

#     if (itr+1)%1000 == 0:
#         print('itr: {:04d} | loss: {:.4f}'.format(itr+1, loss_curr))

#     ### VALIDATION  (based on average C-index of our interest)
#     if (itr+1)%1000 == 0:        
#         risk_all = f_get_risk_predictions(sess, model, va_data, va_data_mi, pred_time, eval_time)
        
#         for p, p_time in enumerate(pred_time):
#             pred_horizon = int(p_time)
#             val_result1 = np.zeros([num_Event, len(eval_time)])
            
#             for t, t_time in enumerate(eval_time):                
#                 eval_horizon = int(t_time) + pred_horizon
#                 for k in range(num_Event):
#                     val_result1[k, t] = c_index(risk_all[k][:, p, t], va_time, (va_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
            
#             if p == 0:
#                 val_final1 = val_result1
#             else:
#                 val_final1 = np.append(val_final1, val_result1, axis=0)

#         tmp_valid = np.mean(val_final1)

#         if tmp_valid >  min_valid:
#             min_valid = tmp_valid
#             saver.save(sess, file_path + '/model')
#             print( 'updated.... average c-index = ' + str('%.4f' %(tmp_valid)))

In [None]:
fold

In [None]:
                 
# risk_all = f_get_risk_predictions(sess, model, te_data, te_data_mi, pred_time, eval_time)

# for p, p_time in enumerate(pred_time):
#     pred_horizon = int(p_time)
#     result1, result2 = np.zeros([num_Event, len(eval_time)]), np.zeros([num_Event, len(eval_time)])

#     for t, t_time in enumerate(eval_time):                
#         eval_horizon = int(t_time) + pred_horizon
#         for k in range(num_Event):
#             result1[k, t] = c_index(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
#             result2[k, t] = brier_score(risk_all[k][:, p, t], te_time, (te_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
    
#     if p == 0:
#         final1, final2 = result1, result2
#     else:
#         final1, final2 = np.append(final1, result1, axis=0), np.append(final2, result2, axis=0)
        
        
        
# row_header = []
# for p_time in pred_time:
#     for t in range(num_Event):
#         row_header.append('pred_time {}: event_{}'.format(p_time,k+1))
            
# col_header = []
# for t_time in eval_time:
#     col_header.append('eval_time {}'.format(t_time))

# # c-index result
# df1 = pd.DataFrame(final1, index = row_header, columns=col_header)

# # brier-score result
# df2 = pd.DataFrame(final2, index = row_header, columns=col_header)

# ### PRINT RESULTS
# print('========================================================')
# print('--------------------------------------------------------')
# print('- C-INDEX: ')
# print(df1)
# print('--------------------------------------------------------')
# print('- BRIER-SCORE: ')
# print(df2)
# print('========================================================')



In [None]:
# !pip install jupyter_contrib_nbextensions
# !jupyter contrib nbextension install --user
# !jupyter contrib nbextension install --sys-prefix
# !jupyter nbextension enable varInspector/main
