### Keras code for Cox proportional hazards survival layer

In [11]:
from Survival.Utils import load_whole_data
from Survival.Utils import load_val_data
from Survival.Utils import calc_scores
from Survival.Utils import load_score_containers
from Survival.Utils import filename_generator

from Survival.NeuralNetworkCox import NeuralNetworkCox

import numpy as np
import pickle


## Full dataset performance


In [3]:
dataset_idxs = [2] # 0: "pancreatitis", 1: "ich", 2: "sepsis"
train_dfs, test_dfs, unique_times, dataset_names = \
    load_whole_data(dataset_idxs, verbose=True, data_path="../../dataset/")


current dataset: pancreatitis
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading patient list, LoS, TUD...
[LOG]Identify one-hot encoded event, extract 'true event' and value.
[LOG]Count occurance for each event; Remove 'na' in value.
[LOG]Feature filtering. Remove events that occurred for less than 7 times.
[LOG]Number of events that ever occurred: 1566
[LOG]Number of events that occurred for more than 7 times: 592
[LOG]Identify categorical event.
[LOG]One-hot encoding categorical event. Scalarize numerical.
[LOG]Value filtering. Remove values that occurred for less than 3 times.
[LOG]After one-hot encoding, number of feature: 955
[LOG]Among them, number of categorical feature: 616
[LOG]Simplify output dict and check correctness.
[LOG]Impute missing value.


  return (np.sqrt(ssd) / old_norm) < self.convergence_threshold


current dataset: ich
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading patient list, LoS, TUD...
[LOG]Identify one-hot encoded event, extract 'true event' and value.
[LOG]Count occurance for each event; Remove 'na' in value.
[LOG]Feature filtering. Remove events that occurred for less than 10 times.
[LOG]Number of events that ever occurred: 1966
[LOG]Number of events that occurred for more than 10 times: 789
[LOG]Identify categorical event.
[LOG]One-hot encoding categorical event. Scalarize numerical.
[LOG]Value filtering. Remove values that occurred for less than 5 times.
[LOG]After one-hot encoding, number of feature: 1512
[LOG]Among them, number of categorical feature: 1093
[LOG]Simplify output dict and check correctness.
[LOG]Impute missing value.
current dataset: sepsis
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading patient list, LoS, TUD...
[LOG]Identify one-hot encoded event, extract 'true event' and 

Try on the complete **Pancreatitis** dataset

In [173]:
train_df = train_dfs[dataset_names[0]]
test_df = test_dfs[dataset_names[0]]
unique_time = unique_times[dataset_names[0]]


In [174]:
%%time
NNCOX = NeuralNetworkCox(first_layer_size=16, epochs=1500, verbose=0)
NNCOX.fit(train_df)


CPU times: user 12.4 s, sys: 1.16 s, total: 13.6 s
Wall time: 8.48 s


In [175]:
%%time
PAN_concordance, PAN_IPEC_list = calc_scores(NNCOX, test_df, unique_time)
print(PAN_concordance, PAN_IPEC_list[int(len(PAN_IPEC_list) * 0.8)])


0.48542761586239846 4.109481129197608
CPU times: user 1.05 s, sys: 11.3 ms, total: 1.06 s
Wall time: 1.06 s


Try on the complete **Ich** dataset

In [176]:
train_df = train_dfs[dataset_names[1]]
test_df = test_dfs[dataset_names[1]]
unique_time = unique_times[dataset_names[1]]


In [183]:
%%time
NNCOX = NeuralNetworkCox(first_layer_size=16, epochs=1500, verbose=0)
NNCOX.fit(train_df)


CPU times: user 48.7 s, sys: 3.89 s, total: 52.5 s
Wall time: 33 s


In [180]:
%%time
ICH_concordance, ICH_IPEC_list = calc_scores(NNCOX, test_df, unique_time)
print(ICH_concordance, ICH_IPEC_list[int(len(ICH_IPEC_list) * 0.8)])


0.5738948966040599 1.6895809967736664
CPU times: user 8.55 s, sys: 57.4 ms, total: 8.61 s
Wall time: 8.7 s


Try on the complete **Spesis** dataset

In [117]:
train_df = train_dfs[dataset_names[2]]
test_df = test_dfs[dataset_names[2]]
unique_time = unique_times[dataset_names[2]]


In [None]:
%%time
NNCOX = NeuralNetworkCox(first_layer_size=16, epochs=1500, verbose=1)
NNCOX.fit(train_df)


In [None]:
%%time
SEP_concordance, SEP_IPEC_list = calc_scores(NNCOX, test_df, unique_time)
print(SEP_concordance, SEP_IPEC_list[int(len(SEP_IPEC_list) * 0.8)])


## Tuning Scripts Test


In [3]:
dataset_idxs = [1]  # 0: "pancreatitis", 1: "ich", 2: "sepsis"
train_dfs, test_dfs, unique_times, dataset_names = load_val_data(dataset_idxs, True, "../../dataset/")


current dataset: ich
---------------------------------------------
fold 0
1204 16:15:17
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading patient list, LoS, TUD...
[LOG]Identify one-hot encoded event, extract 'true event' and value.
[LOG]Count occurance for each event; Remove 'na' in value.
[LOG]Feature filtering. Remove events that occurred for less than 7 times.
[LOG]Number of events that ever occurred: 1896
[LOG]Number of events that occurred for more than 7 times: 817
[LOG]Identify categorical event.
[LOG]One-hot encoding categorical event. Scalarize numerical.
[LOG]Value filtering. Remove values that occurred for less than 3 times.
[LOG]After one-hot encoding, number of feature: 1709
[LOG]Among them, number of categorical feature: 1277
[LOG]Simplify output dict and check correctness.
[LOG]Impute missing value.


  return (np.sqrt(ssd) / old_norm) < self.convergence_threshold


---------------------------------------------
fold 1
1204 16:17:54
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading patient list, LoS, TUD...
[LOG]Identify one-hot encoded event, extract 'true event' and value.
[LOG]Count occurance for each event; Remove 'na' in value.
[LOG]Feature filtering. Remove events that occurred for less than 7 times.
[LOG]Number of events that ever occurred: 1896
[LOG]Number of events that occurred for more than 7 times: 817
[LOG]Identify categorical event.
[LOG]One-hot encoding categorical event. Scalarize numerical.
[LOG]Value filtering. Remove values that occurred for less than 3 times.
[LOG]After one-hot encoding, number of feature: 1709
[LOG]Among them, number of categorical feature: 1277
[LOG]Simplify output dict and check correctness.
[LOG]Impute missing value.
---------------------------------------------
fold 2
1204 16:20:30
[LOG]Reading train&test csv...
[LOG]Process df row by row and update dict.
[LOG]Reading p

In [16]:
idx = 0
first_layers = [8, 16, 32, 64, 128, 256]
lmbdas = [0., 0.01, 0.02, 0.06, 0.1, 0.12, 0.15]
dataset_name = dataset_names[idx]
dataset_idx = dataset_idxs[idx]
filename = filename_generator("NNC", False, [dataset_idx])
# concordances, ipecs = load_score_containers([dataset_name], [first_layers, lmbdas])
concordances = {}
ipecs = {}

print("\nFor the " + dataset_name + " dataset:")

for row, first_layer in enumerate(first_layers):
    for col, lmbda in enumerate(lmbdas):
        print("[LOG] first_layer = {}, lmbda = {}".format(
            first_layer, lmbda))

        tmp_concordances = []
        tmp_ipecs = []

        for index, cur_train in enumerate(train_dfs[dataset_name]):
            print(index, end=" ")
            cur_test = test_dfs[dataset_name][index]
            model = NeuralNetworkCox(first_layer_size=first_layer, lmbda=lmbda, verbose=0)
            model.fit(cur_train, duration_col='LOS', event_col='OUT')
            concordance, ipec_score = calc_scores(model, cur_test, unique_times[dataset_name])
            print(concordance, ipec_score[int(len(ipec_score) * 0.8)])

            tmp_concordances.append(concordance)
            tmp_ipecs.append(ipec_score)

        avg_concordance = np.average(tmp_concordances)
        avg_ipec = np.average(tmp_ipecs, axis=0)
        print("[LOG] avg. concordance:", avg_concordance)
        print("[LOG] avg. ipec:", avg_ipec[int(len(avg_ipec) * 0.8)])

        concordances[(first_layer, lmbda)] = avg_concordance
        ipecs[(first_layer, lmbda)] = avg_ipec

        print("-------------------------------------------------------")

        with open(filename, 'wb') as f:
            pickle.dump([first_layers, lmbdas, concordances, ipecs], f, pickle.HIGHEST_PROTOCOL)
            



For the ich dataset:
[LOG] first_layer = 8, lmbda = 0.0
0 0.536467410247473 2.5726878434244096
1 0.5771174625304984 1.946086385858014
2 0.5572063436737539 2.253182068934815
3 0.606887417218543 1.9157242022043144
4 0.5618543046357616 1.9405950389646305
[LOG] avg. concordance: 0.5679065876612059
[LOG] avg. ipec: 2.125655107877237
-------------------------------------------------------
[LOG] first_layer = 8, lmbda = 0.01
0 0.5263593586615546 2.49153391056251
1 0.5650052283025444 1.9970687760911325
2 0.5837835482746602 1.9975147641776614
3 0.4781456953642384 3.523432923985743
4 0.5756291390728476 1.8836456279395584
[LOG] avg. concordance: 0.5457845939351691
[LOG] avg. ipec: 2.378639200551321
-------------------------------------------------------
[LOG] first_layer = 8, lmbda = 0.02
0 0.5271436040432206 2.620830701179726
1 0.5731526664342976 1.9493256046082166
2 0.5728912513070756 2.0966761007161074
3 0.5835761589403974 1.909387731832924
4 0.5408830022075055 2.055451793653473
[LOG] avg. co

1 0.5935866155454862 1.7302743035516008
2 0.563785291042175 2.092565665143125
3 0.5764238410596026 2.0208290512572513
4 0.56158940397351 2.2704933528750884
[LOG] avg. concordance: 0.5648107354478913
[LOG] avg. ipec: 2.1036031289288273
-------------------------------------------------------
[LOG] first_layer = 64, lmbda = 0.02
0 0.5388201463924712 2.3005170938971027
1 0.576551063088184 1.9490390038484997
2 0.5664865806901359 2.052731979196542
3 0.5911699779249449 1.931053314004929
4 0.5658719646799117 1.9808772435366124
[LOG] avg. concordance: 0.5677799465551295
[LOG] avg. ipec: 2.042843726896737
-------------------------------------------------------
[LOG] first_layer = 64, lmbda = 0.06
0 0.5411293133495991 2.248101626774127
1 0.6123213663297317 1.5923719127782932
2 0.5891861275705821 1.858667105861098
3 0.600485651214128 1.8174994677770135
4 0.5831346578366446 1.831722066984918
[LOG] avg. concordance: 0.5852514232601371
[LOG] avg. ipec: 1.8696724360350898
-----------------------------

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


0.5 nan
3 0.5805298013245033 2.001797954101557
4 0.5726269315673289 1.9261505690675769
[LOG] avg. concordance: 0.5487125595445568
[LOG] avg. ipec: nan
-------------------------------------------------------
[LOG] first_layer = 256, lmbda = 0.06
0 0.5417828511676542 2.2100797542131825
1 0.6008191007319623 1.6610329823738685
2 0.5 nan
3 0.5865783664459161 1.7234505565994627
4 0.5632671081677704 2.0011451353358347
[LOG] avg. concordance: 0.5584894853026606
[LOG] avg. ipec: nan
-------------------------------------------------------
[LOG] first_layer = 256, lmbda = 0.1
0 0.5569013593586616 2.033712465473297
1 0.6231265249215755 1.4725248262795303
2 0.6031282676890902 1.6698057756434945
3 0.5993377483443708 1.6196078048957272
4 0.5954083885209713 1.6463350902046565
[LOG] avg. concordance: 0.5955804577669339
[LOG] avg. ipec: 1.688397192499341
-------------------------------------------------------
[LOG] first_layer = 256, lmbda = 0.12
0 0.5670529801324503 1.961077106598572
1 0.61092715231788

In [None]:
idx = 0
first_layers = [8, 16, 32, 64, 96, 128, 192, 256]
lmbdas = [0., 0.01, 0.02]
dataset_name = dataset_names[idx]
unique_time = unique_times[dataset_name]
dataset_idx = dataset_idxs[idx]
filename = filename_generator("NNC", False, [dataset_idx])
# concordances, ipecs = load_score_containers([dataset_name], [first_layers, lmbdas])
concordances = {}
ipecs = {}

print("\nFor the " + dataset_name + " dataset:")

for row, first_layer in enumerate(first_layers):
    for col, lmbda in enumerate(lmbdas):
        print("[LOG] first_layer = {}, lmbda = {}".format(first_layer, lmbda))

        tmp_concordances = []
        tmp_ipecs = []

        for index, cur_train in enumerate(train_dfs[dataset_name]):
            print(index, end=" ")
            cur_test = test_dfs[dataset_name][index]
            model = NeuralNetworkCox(first_layer_size=first_layer, lmbda=lmbda, epochs=2000, verbose=0)
            model.fit(cur_train, duration_col='LOS', event_col='OUT')
            concordance, ipec_score_list = calc_scores(model, cur_test, unique_time)
            print(concordance)

            tmp_concordances.append(concordance)
            tmp_ipecs.append(ipec_score)

        avg_concordance = np.average(tmp_concordances)
        avg_ipec = np.average(tmp_ipecs)
        print("[LOG] avg. concordance:", avg_concordance)
        print("[LOG] avg. ipec:", avg_ipec)

        concordances[dataset_name][row] = avg_concordance
        ipecs[dataset_name][row] = avg_ipec

        print("-------------------------------------------------------")

        with open(filename, 'wb') as f:
            pickle.dump([first_layers, lmbdas, concordances, ipecs], f, pickle.HIGHEST_PROTOCOL)


In [200]:
first_layer_sizes = [8, 16, 32, 64, 128]
concordance_result = {}
ipec_result = {}

dataset_name = dataset_names[0]

for dataset_name in dataset_names:
    unique_time = unique_times[dataset_name]
    concordances = []
    ipecs = []
    print("\nFor the " + dataset_name + " dataset:")
    
    for row, first_layer_size in enumerate(first_layer_sizes):
        print("[LOG] first_layer_size = {}".format(first_layer_size))
        
        tmp_concordances = []
        tmp_ipecs = []

        for index, cur_train in enumerate(train_dfs[dataset_name]):
            cur_test = test_dfs[dataset_name][index]
            
            model = NeuralNetworkCox(first_layer_size=first_layer_size, epochs=2000, verbose=0)
            model.fit(cur_train, duration_col='LOS', event_col='OUT')
            concordance, ipec_score_list = calc_scores(model, cur_test, unique_time)
            
            tmp_concordances.append(concordance)
            tmp_ipecs.append(ipec_score_list)

        avg_concordance = np.average(tmp_concordances)
        avg_ipec = np.average(tmp_ipecs, axis=0)
        print("[LOG] avg. concordance:", avg_concordance)
        print("[LOG] avg. ipec:", avg_ipec[int(len(avg_ipec) * 0.8)])

        concordances.append(avg_concordance)
        ipecs.append(avg_ipec)
    
    concordance_result[dataset_name] = concordances
    ipec_result[dataset_name] = ipecs




For the pancreatitis dataset:
[LOG] first_layer_size = 8
[LOG] avg. concordance: 0.5753215488215488
[LOG] avg. ipec: 3.4120316130733066
[LOG] first_layer_size = 16
[LOG] avg. concordance: 0.5736048581048582
[LOG] avg. ipec: 3.432582309174939
[LOG] first_layer_size = 32
[LOG] avg. concordance: 0.5785473785473786
[LOG] avg. ipec: 3.38209070774906
[LOG] first_layer_size = 64
[LOG] avg. concordance: 0.5774300144300145
[LOG] avg. ipec: 3.392501325919244
[LOG] first_layer_size = 128
[LOG] avg. concordance: 0.5768600288600289
[LOG] avg. ipec: 3.3949948105549934

For the ich dataset:
[LOG] first_layer_size = 8
[LOG] avg. concordance: 0.5630347391657953
[LOG] avg. ipec: 2.1524096315226284
[LOG] first_layer_size = 16
[LOG] avg. concordance: 0.567214941326827
[LOG] avg. ipec: 2.0818974789897733
[LOG] first_layer_size = 32
[LOG] avg. concordance: 0.5542585105146973
[LOG] avg. ipec: 2.1669880399180133
[LOG] first_layer_size = 64
[LOG] avg. concordance: 0.5681728825374694
[LOG] avg. ipec: 2.0638685

In [201]:
dataset_name = dataset_names[1]

unique_time = unique_times[dataset_name]
concordances = []
ipecs = []
print("\nFor the " + dataset_name + " dataset:")

for row, first_layer_size in enumerate([128, 256]):
    print("[LOG] first_layer_size = {}".format(first_layer_size))

    tmp_concordances = []
    tmp_ipecs = []

    for index, cur_train in enumerate(train_dfs[dataset_name]):
        cur_test = test_dfs[dataset_name][index]

        model = NeuralNetworkCox(first_layer_size=first_layer_size, epochs=2000, verbose=0, lmbda=0.1)
        model.fit(cur_train, duration_col='LOS', event_col='OUT')
        concordance, ipec_score_list = calc_scores(model, cur_test, unique_time)

        tmp_concordances.append(concordance)
        tmp_ipecs.append(ipec_score_list)

    avg_concordance = np.average(tmp_concordances)
    avg_ipec = np.average(tmp_ipecs, axis=0)
    print("[LOG] avg. concordance:", avg_concordance)
    print("[LOG] avg. ipec:", avg_ipec[int(len(avg_ipec) * 0.8)])

    concordances.append(avg_concordance)
    ipecs.append(avg_ipec)



For the ich dataset:
[LOG] first_layer_size = 128
[LOG] avg. concordance: 0.5812185430463577
[LOG] avg. ipec: 1.8740793022331967
[LOG] first_layer_size = 256
[LOG] avg. concordance: 0.5755504821656791
[LOG] avg. ipec: 1.9143238912932723
