In [1]:
from Survival.FeatureEngineer import FeatureEngineer
from Survival.Utils import model_prepare
from Survival.Utils import calculate_dataset_size
from Survival.Utils import evaluate_predict_result
from Survival.IPEC import IPEC

from Survival.RandomSurvivalForest import RandomSurvivalForest
from Survival.CoxPHModel import CoxPHModel
from Survival.KNNKaplanMeier import KNNKaplanMeier
from Survival.AalenAdditiveModel import AalenAdditiveModel

import numpy as np


Using TensorFlow backend.


In [2]:
fe = FeatureEngineer(verbose=False, data_path="../../dataset/")
sources = fe.get_diseases_list()


In [3]:
dataset_idx = 0
file_prefix = ""
low_event_thd = 0.03
low_value_thd = low_event_thd / 3
patient_dict, feature_set, train_id_list, test_id_list = \
    fe.load_data_as_dict(dataset_idx, file_prefix=file_prefix, 
                         low_freq_event_thd=low_event_thd, 
                         low_freq_value_thd=low_value_thd)
train_df, test_df, feature_list = \
    model_prepare(patient_dict, feature_set, train_id_list, test_id_list)


  return (np.sqrt(ssd) / old_norm) < self.convergence_threshold


## Models


### Random Survival Forest

In [24]:
%%time

n_trees = 10
max_features = 20
max_depth = 5

rd = RandomSurvivalForest(n_trees=n_trees, max_features=max_features, max_depth=max_depth, 
                          pca_flag=True, n_components=int(np.max([10.0, 20.0])))
rd.fit(train_df, duration_col='LOS', event_col='OUT', num_workers=2)


CPU times: user 73.7 ms, sys: 31.1 ms, total: 105 ms
Wall time: 19.8 s


In [16]:
test_time_median_pred = rd.pred_median_time(test_df)
concordance = evaluate_predict_result(test_time_median_pred, test_df, print_result=True)


concordance: 0.5488533205924511


In [17]:
ipec = IPEC(train_df, g_type="All_One", t_thd=0.8, t_step="obs", time_col='LOS', death_identifier='OUT', verbose=False)
check_points = ipec.get_check_points()


In [18]:
proba_matrix = rd.pred_proba(test_df, time=check_points)


In [19]:
ipec_score = ipec.calc_ipec(np.array(proba_matrix), list(test_df["LOS"]), list(test_df["OUT"]))


In [20]:
ipec_score

1.68160967212041

### Cox


In [10]:
%%time

alpha = 1.
lambd = 0.07

cox = CoxPHModel(alpha=alpha, lambda_=lambd)
cox.fit(train_df, duration_col='LOS', event_col='OUT')


CPU times: user 6.2 s, sys: 19.2 ms, total: 6.22 s
Wall time: 6.24 s


In [11]:
test_time_median_pred = cox.pred_median_time(test_df)
concordance = evaluate_predict_result(test_time_median_pred, test_df, print_result=True)


concordance: 0.5590062111801242


In [12]:
ipec = IPEC(train_df, g_type="All_One", t_thd=0.8, t_step="obs", time_col='LOS', death_identifier='OUT', verbose=False)
check_points = ipec.get_check_points()


In [13]:
proba_matrix = cox.pred_proba(test_df, time=check_points)


In [14]:
ipec_score = ipec.calc_ipec(np.array(proba_matrix), list(test_df["LOS"]), list(test_df["OUT"]))
ipec_score

2.147429909939688

### Aalen Additive Model

In [46]:
coef_penalizer = 0.1

aaf = AalenAdditiveModel(coef_penalizer=coef_penalizer)
aaf.fit(train_df, duration_col='LOS', event_col='OUT')


In [47]:
test_time_median_pred = aaf.pred_median_time(test_df)
concordance = evaluate_predict_result(test_time_median_pred, test_df, print_result=True)


concordance: 0.5126612517916865


In [50]:
proba_pred = aaf.pred_proba(test_df.iloc[[3]], time=1.0)
print(proba_pred)


[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182

[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182838548]
[0.2636051182

### KNN Kaplan Meier Model

In [4]:
n_neighbors = 12

knn_based_kmf = KNNKaplanMeier(n_neighbors=n_neighbors)
knn_based_kmf.fit(train_df, duration_col="LOS", event_col="OUT")


In [5]:
test_time_median_pred = knn_based_kmf.pred_median_time(test_df)
concordance = evaluate_predict_result(test_time_median_pred, test_df, print_result=True)


concordance: 0.5857620640229336


In [6]:
ipec = IPEC(train_df, g_type="All_One", t_thd=0.8, t_step="obs", time_col='LOS', death_identifier='OUT', verbose=False)
check_points = ipec.get_check_points()


In [8]:
proba_matrix = knn_based_kmf.pred_proba(test_df, time=check_points)


In [9]:
ipec_score = ipec.calc_ipec(np.array(proba_matrix), list(test_df["LOS"]), list(test_df["OUT"]))
ipec_score

1.5152523126843305

## Data Preparation


In [None]:
dataset_idx = 0
train_dfs = []
test_dfs = []

for i in range(5):
    print(i)
    patient_dict, feature_set, train_id_list, test_id_list = \
        fe.load_data_as_dict(dataset_idx, file_prefix="cross_val/10-5fold_"+str(i)+"_", 
                             low_freq_event_thd=0.03, low_freq_value_thd=0.01)
    train_x, train_y, test_x, test_y, train_df, test_df, feature_list = \
        model_prepare(patient_dict, feature_set, train_id_list, test_id_list)
    train_dfs.append(train_df)
    test_dfs.append(test_df)


## Read Result


In [2]:
import pickle


### KNN

In [None]:
with open('KNN.pickle', 'rb') as f:
    knn_small_result = pickle.load(f)
knn_small_value = [5, 10, 15, 20, 30]
print(knn_value)

with open('KNN_large.pickle', 'rb') as f:
    knn_large_result = pickle.load(f)
knn_large_value = [50, 80, 100, 150, 200]
print(knn_large_value)

with open('AAM.pickle', 'rb') as f:
    aam_small_result = pickle.load(f)
aam_small_value = [0.05, 0.08, 0.1, 0.12, 0.15, 0.2, 0.3]
print(aam_small_value)

with open('AAM_large.pickle', 'rb') as f:
    aam_large_result = pickle.load(f)
aam_large_value = [0.3, 0.35, 0.4, 0.45, 0.5, 0.6]
print(aam_large_value)


In [22]:
knn_small_value + knn_large_value


[5, 10, 15, 20, 30, 50, 80, 100, 150, 200]

In [27]:
print("KNN, ICH, concordance")
print(list(knn_small_result[0]["ich"]) + list(knn_large_result[0]["ich"]))


KNN, ICH, concordance
[0.5729085627977228, 0.5761260601835716, 0.5753041710235854, 0.5694813523875915, 0.5817065179505054, 0.580925641919368, 0.5851056117113977, 0.5811187405600092, 0.582252236551644, 0.582298361798536]


In [32]:
print("KNN on ICH, IPEC")
print(list(knn_small_result[1]["ich"]) + list(knn_large_result[1]["ich"]))


KNN on ICH, IPEC
[0.20196751757442835, 0.19296079998560986, 0.18821019543353534, 0.1865315687278688, 0.1823964997497527, 0.18069443220029754, 0.18058193441643783, 0.18090214450003048, 0.18094019267265044, 0.18195513032279723]


In [30]:
print("KNN, pancreatitis, concordance")
print(list(knn_small_result[0]["pancreatitis"]) + list(knn_large_result[0]["pancreatitis"]))


KNN, pancreatitis, concordance
[0.5569867724867725, 0.5537400192400193, 0.5455050505050505, 0.5540447330447331, 0.535077441077441, 0.5358395863395864, 0.5170675805675806, 0.5121450216450216, 0.5064367484367485, 0.49512578162578164]


In [33]:
print("KNN on pancreatitis, IPEC")
print(list(knn_small_result[1]["pancreatitis"]) + list(knn_large_result[1]["pancreatitis"]))


KNN on pancreatitis, IPEC
[0.20042574754000636, 0.1906304152136668, 0.18946485692283332, 0.18613777947475532, 0.18686605112389412, 0.18639644724663376, 0.18622910181153443, 0.18628138147036263, 0.18528973831190673, 0.18542862670993487]


### AAM

In [35]:
print(aam_small_value + aam_large_value)

print("AAM, ICH, concordance")
print(list(aam_small_result[0]["ich"]) + list(aam_large_result[0]["ich"]))

print("AAM on ICH, IPEC")
print(list(aam_small_result[1]["ich"]) + list(aam_large_result[1]["ich"]))

print("AAM, pancreatitis, concordance")
print(list(aam_small_result[0]["pancreatitis"]) + list(aam_large_result[0]["pancreatitis"]))

print("AAM on pancreatitis, IPEC")
print(list(aam_small_result[1]["pancreatitis"]) + list(aam_large_result[1]["pancreatitis"]))


AAM, ICH, concordance
[0.5523046357615894, 0.5604739165795283, 0.5627684442895318, 0.5596718949692111, 0.5611541768328104, 0.5629286627163936, 0.5646888579063554, 0.5649883815499013, 0.5681879865225979, 0.5707011734634599, 0.5704489369118161, 0.571710119670036, 0.5722579295921923]
AAM on ICH, IPEC
[0.322790796513818, 0.3160184153638755, 0.3126440685512023, 0.30981338323329644, 0.30605018405875056, 0.30089251065984113, 0.29305254802558156, 0.2930766261057086, 0.29028580098181733, 0.28786780218632974, 0.285733940611933, 0.28390186029190767, 0.28083039626243467]
AAM, pancreatitis, concordance
[0.5100310245310246, 0.5102688792688793, 0.5102388167388168, 0.510380230880231, 0.5098114478114477, 0.51010101010101, 0.5115495430495431, 0.5108840788840789, 0.5105363155363155, 0.5097409812409812, 0.5102984607984609, 0.5101803751803751, 0.5105103415103415]
AAM on pancreatitis, IPEC
[0.3787472324322354, 0.3782282409657072, 0.37788626417727444, 0.3775722269219542, 0.3771360045539843, 0.376425018847365

In [33]:
print("AAM, ICH, concordance")
print(result[0]["ich"])


AAM, ICH, concordance
[0.55230464 0.56047392 0.56276844 0.55967189 0.56115418 0.56292866
 0.56468886]


In [34]:
print("AAM, ICH, IPEC")
print(result[1]["ich"])


AAM, ICH, IPEC
[0.3227908  0.31601842 0.31264407 0.30981338 0.30605018 0.30089251
 0.29305255]


In [35]:
print("AAM, pancreatitis, concordance")
print(result[0]["pancreatitis"])


AAM, pancreatitis, concordance
[0.51003102 0.51026888 0.51023882 0.51038023 0.50981145 0.51010101
 0.51154954]


In [36]:
print("AAM, pancreatitis, IPEC")
print(result[1]["pancreatitis"])


AAM, pancreatitis, IPEC
[0.37874723 0.37822824 0.37788626 0.37757223 0.377136   0.37642502
 0.37527073]


In [11]:
print("AAM, ICH, concordance")
print(result[0]["ich"])


AAM, ICH, concordance
[0.56498838 0.56818799 0.57070117 0.57044894 0.57171012 0.57225793]


In [12]:
print("AAM, ICH, IPEC")
print(result[1]["ich"])


AAM, ICH, IPEC
[0.29307663 0.2902858  0.2878678  0.28573394 0.28390186 0.2808304 ]


In [13]:
print("AAM, pancreatitis, concordance")
print(result[0]["pancreatitis"])


AAM, pancreatitis, concordance
[0.51088408 0.51053632 0.50974098 0.51029846 0.51018038 0.51051034]


In [14]:
print("AAM, pancreatitis, IPEC")
print(result[1]["pancreatitis"])


AAM, pancreatitis, IPEC
[0.37526597 0.37451314 0.37386782 0.37324159 0.37266741 0.37170037]
