In [1]:
import os
import pandas as pd
import numpy as np
import numpy.ma as ma
from scipy import signal, stats, io
import pedAKI_predictor as ppaki
from sklearn.metrics import auc, roc_curve
from sklearn import metrics
import itertools
import pickle

In [2]:
fileDir = os.path.dirname("__file__")
timelag_all = list(-1*np.arange(25))
timewin_all = [12, 6]

combination = [(x,y) for x in timelag_all for y in timewin_all]
mask = [abs(x)>=abs(y) for (x,y) in combination]
combination = list(itertools.compress(combination,mask))

In [13]:
cut_off = 0
fill_mode = 'mean'
ref_type = 'onset'
cv = 10

# ex_fts = ['ph', 'glucose', 'ratio_pao2_flo2']
fts = ['nsbp', 'ndbp', 'hr', 'spo2', 'ratio_pao2_flo2',
       'hemoglobin', 'temperature', 'wbc', 'platelet',
       'bilirubin', 'albumin', 'ph', 'urine', 'potassium',
       'calcium', 'glucose', 'creatinine', 'lactic_acid',
       'bun', 'osi', 'si', 'oi']
ex_fts = ['ph', 'ratio_pao2_flo2', 'calcium', 'bilirubin', 'bun']
suffices = ['min', 'max', 'mean', 'median', 'last']
ex_fts_full = [ft+"_"+suff for ft in ex_fts for suff in suffices]

# top 10
# top_fts = ['lactic_acid', 'oi', 'hemoglobin', 'platelet', 'nsbp', 
#            'potassium', 'temperature', 'urine', 'glucose', 'ndbp']

# top 15
top_fts = ['lactic_acid', 'oi', 'hemoglobin', 'platelet', 'nsbp', 
           'potassium', 'temperature', 'urine', 'glucose', 'ndbp',
           'creatinine', 'spo2', 'osi', 'albumin', 'hr']

top_fts_full = [ft+"_"+suff for ft in top_fts for suff in suffices]

# p-value>=0.001
sm_fts = ['urine_mean', 'spo2_median', 'temperature_last', 'temperature_median', 
          'temperature_max', 'temperature_mean', 'urine_median', 'urine_last',
          'spo2_mean', 'spo2_last', 'potassium_median', 
          'potassium_mean', 'potassium_last', 'temperature_min', 'wbc_last', 
          'wbc_max', 'wbc_mean', 'wbc_median', 'wbc_min', 'spo2_max', 
          'creatinine_max', 'creatinine_last', 'creatinine_median', 'creatinine_mean',
          'creatinine_min', 'osi_max', 'spo2_min', 'oi_max']

# p-value>=0.01
# sm_fts = ['urine_mean', 'spo2_median', 'temperature_last', 'temperature_median', 
#           'temperature_max', 'temperature_mean', 'urine_median', 'urine_last', 
#           'spo2_mean', 'spo2_last', 'potassium_median', 'potassium_mean', 
#           'potassium_last', 'temperature_min', 'wbc_last', 'wbc_max', 
#           'wbc_mean', 'wbc_median', 'wbc_min']

In [14]:
def getValidator(fname, mask_idx):
#     fname = 'ism_onset_tt_tlag006_twin006.pkl'
#     fname = os.path.join(fileDir, ism_tt_dir, fname)

    f_handle = open(fname, 'rb')
    data = pickle.load(f_handle)
    f_handle.close()

    X_train_1 = data['X_train']
    y_train_1 = data['y_train']
    X_train_2 = data['X_test']
    y_train_2 = data['y_test']
    X_train = np.concatenate((X_train_1, X_train_2), axis=0)
    X_train = X_train[:, mask_idx]
    y_train = np.concatenate((y_train_1, y_train_2), axis=0)

    predictors = data['predictors']
    predictors = [predictors[idx] for idx in mask_idx]

    data = {'X_train': X_train, 'y_train': y_train, 
            'X_test': X_train, 'y_test': y_train, 
            'predictors': predictors}

    validator = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode,
                                        ref_type=ref_type, cv=5, timelag=6, timewindow=6)
    validator.cv_full()
    model = validator.predictor
    
    return (data, model)

## Train and test on ISM only

In [19]:
reload(ppaki)
only_last = True
ex_cps_fts = False
ex_creatinine = False
only_common = True
only_sim_fts = False
only_top_fts = False
train_on_time0 = True
ism_tt_dir = 'train_test_ism5'
ism_result_dir = 'results_logreg4'
ism_result_fname = 'logreg_ism_only_last_manual018_train0.pkl'


f_ism = open(os.path.join(fileDir, ism_tt_dir, 'ism_onset_tt_tlag006_twin006.pkl'),'rb')
tmp_ism = pickle.load(f_ism)
f_ism.close()
ism_invar = tmp_ism['predictors']
idx_all = np.arange(len(ism_invar))

in_fts_last = [ft+'_last' for ft in fts]
# in_fts_last.extend(['sex_M', 'age'])
in_fts_last.append('age')
if only_last:
    mask_last = np.in1d(ism_invar, in_fts_last)    
else:
    mask_last = np.in1d(ism_invar, ism_invar)
    
if only_common:
    mask_common = ~np.in1d(ism_invar, ex_fts_full)
else:
    mask_common = np.in1d(ism_invar, ism_invar)
    
cps_fts = ['oi', 'osi', 'si']
cps_ft_stats = [ft+"_"+stat for ft in cps_fts for stat in suffices]
if ex_cps_fts:
    mask_ex_cps = ~np.in1d(ism_invar, cps_ft_stats)    
else:
    mask_ex_cps = np.in1d(ism_invar, ism_invar)
        
scr_ft_stats = ['creatinine_'+stat for stat in suffices]
if ex_creatinine:
    mask_ex_creatinine = ~np.in1d(ism_invar, scr_ft_stats)    
else:
    mask_ex_creatinine = np.in1d(ism_invar, ism_invar)

if only_sim_fts:
    mask_only_sim_fts = np.in1d(ism_invar, sm_fts)
else:
    mask_only_sim_fts = np.in1d(ism_invar, ism_invar)
    
if only_top_fts:
    mask_only_top_fts = np.in1d(ism_invar, top_fts_full)
else:
    mask_only_top_fts = np.in1d(ism_invar, ism_invar)
    
mask_all = mask_last & mask_ex_cps & mask_ex_creatinine & mask_only_sim_fts & mask_only_top_fts & mask_common
mask_idx = [idx for idx, bool in zip(idx_all, mask_all) if bool==True]

fname_ism_tt006 = 'ism_onset_tt_tlag006_twin006.pkl'
fname_ism_tt006 = os.path.join(fileDir, ism_tt_dir, fname_ism_tt006)
data006, model006 = getValidator(fname_ism_tt006, mask_idx)

fname_ism_tt012 = 'ism_onset_tt_tlag012_twin012.pkl'
fname_ism_tt012 = os.path.join(fileDir, ism_tt_dir, fname_ism_tt012)
data012, model012 = getValidator(fname_ism_tt012, mask_idx)

performance_ism = {}




for tlag, twin in np.abs(combination):
#     try:
        print('timelag: {}h , timewin: {}h'.format(-tlag, twin))
        fname_ism_tt = 'ism_onset_tt_tlag{:03d}_twin{:03d}.pkl'.format(tlag, twin)
        fname_ism_tt = os.path.join(fileDir, ism_tt_dir, fname_ism_tt)
        
        f_ism_tt = open(fname_ism_tt, 'rb')
        data = pickle.load(f_ism_tt)
        f_ism_tt.close()
        
        
        if train_on_time0:
            X_train = eval('data{:03d}'.format(twin))['X_train']
            y_train = eval('data{:03d}'.format(twin))['y_train']
            
            X_test1 = data['X_train'][:, mask_idx]
            y_test1 = data['y_train']            
            X_test2 = data['X_test'][:, mask_idx]
            y_test2 = data['y_test']
            X_test = np.concatenate((X_test1, X_test2), axis=0)
            y_test = np.concatenate((y_test1, y_test2), axis=0)
        else:
            X_train = data['X_train'][:, mask_idx]
            y_train = data['y_train']            
            X_test = data['X_test'][:, mask_idx]
            y_test = data['y_test']
            
            
        predictors = data['predictors']
        predictors = [predictors[idx] for idx in mask_idx]
        
        
        data = {'X_train': X_train,
                'y_train': y_train,
                'X_test': X_test,
                'y_test': y_test,
                'predictors': predictors
               }            
         
        if train_on_time0:
            validator_ism = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                    ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin, 
                                                    pre_model=eval('model{:03d}'.format(twin)))
        else:
            validator_ism = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                    ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin)
            
        validator_ism.cv_full()
        
        performance_ism[(tlag, twin)] = validator_ism.get_full() 

        print("length of performance: {}".format(len(performance_ism)))
#     except:
#         pass
    
f = open(os.path.join(fileDir, ism_result_dir, ism_result_fname),'wb')
pickle.dump(performance_ism, f)
f.close()

auc_ism_twin012 = np.array([(tlag, performance_ism[(tlag, twin)]['test auc'])
                            for (tlag, twin) in performance_ism if twin==12])
auc_ism_twin006 = np.array([(tlag, performance_ism[(tlag, twin)]['test auc']) 
                            for (tlag, twin) in performance_ism if twin==6])
auc_ism_twin012 = auc_ism_twin012[auc_ism_twin012[:,0].argsort(),:]
auc_ism_twin006 = auc_ism_twin006[auc_ism_twin006[:,0].argsort(),:]

Test set normalized ...
training ...
testing ...
0.715288652883
Test set normalized ...
training ...
testing ...
0.734594128392
timelag: -6h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.715288652883
length of performance: 1
timelag: -7h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.707957924732
length of performance: 2
timelag: -8h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.720770318492
length of performance: 3
timelag: -9h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.719668941789
length of performance: 4
timelag: -10h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.71232597581
length of performance: 5
timelag: -11h , timewin: 6h
Test set normalized ...
Already trained ...
testing ...
0.710922624434
length of performance: 6
timelag: -12h , timewin: 12h
Test set normalized ...
Already trained ...
testing ...
0.734594128392
length of performance: 7
timelag:

In [20]:
auc_ism_twin006

array([[  6.        ,   0.71528865],
       [  7.        ,   0.70795792],
       [  8.        ,   0.72077032],
       [  9.        ,   0.71966894],
       [ 10.        ,   0.71232598],
       [ 11.        ,   0.71092262],
       [ 12.        ,   0.70083146],
       [ 13.        ,   0.70780254],
       [ 14.        ,   0.68954651],
       [ 15.        ,   0.68484914],
       [ 16.        ,   0.68009245],
       [ 17.        ,   0.68794036],
       [ 18.        ,   0.68310624],
       [ 19.        ,   0.67175127],
       [ 20.        ,   0.67781755],
       [ 21.        ,   0.67077866],
       [ 22.        ,   0.66431632],
       [ 23.        ,   0.65866679],
       [ 24.        ,   0.64425189]])

## Train on ISM, Test on STM

In [21]:
def orderPredictors(ism_pred, stm_pred, stm_idx):
    stm_idx_reorder = [stm_idx[stm_pred.index(pred)] for pred in ism_pred]
    predictor_order = [stm_pred.index(pred) for pred in ism_pred]    
    stm_pred = [stm_pred[idx] for idx in predictor_order]
    return (stm_pred, stm_idx_reorder)

In [22]:
only_last = True
ex_diff_fts = True
ex_cps_fts = False
ex_creatinine = False
only_sim_fts = False
only_top_fts = False
train_on_time0 = True
ism_tt_dir = 'train_test_ism5'
stm_tt_dir = 'train_test_stm5'
ism_stm_result_dir = 'results_logreg4'
ism_stm_result_fname = 'logreg_train_ism_test_stm_last_manual018_train0.pkl'


fname_ism = os.path.join(fileDir, ism_tt_dir, 'ism_onset_tt_tlag006_twin006.pkl')
f_ism = open(fname_ism,'rb')
tmp_ism = pickle.load(f_ism)
fname_stm = os.path.join(fileDir, stm_tt_dir, 'stm_onset_tt_tlag006_twin006.pkl')
f_stm = open(fname_stm,'rb')
tmp_stm = pickle.load(f_stm)
ism_invar = tmp_ism['predictors']
stm_invar = tmp_stm['predictors']

mask_ism = np.in1d(ism_invar, stm_invar)
mask_stm = np.in1d(stm_invar, stm_invar)

in_fts_last = [ft+'_last' for ft in fts]
# in_fts_last.extend(['sex_M', 'age'])
in_fts_last.append('age')
if only_last:
    mask_ism_last = np.in1d(ism_invar, in_fts_last)
    mask_stm_last = np.in1d(stm_invar, in_fts_last)
else:
    mask_ism_last = np.in1d(ism_invar, ism_invar)
    mask_stm_last = np.in1d(stm_invar, stm_invar)

cps_fts = ['oi', 'osi', 'si']
cps_ft_stats = [ft+"_"+stat for ft in cps_fts for stat in suffices]
if ex_cps_fts:
    mask_ism_ex_cps = ~np.in1d(ism_invar, cps_ft_stats)
    mask_stm_ex_cps = ~np.in1d(stm_invar, cps_ft_stats)
else:
    mask_ism_ex_cps = np.in1d(ism_invar, ism_invar)
    mask_stm_ex_cps = np.in1d(stm_invar, stm_invar)
    
scr_ft_stats = ['creatinine_'+stat for stat in suffices]
if ex_creatinine:
    mask_ism_ex_scr = ~np.in1d(ism_invar, scr_ft_stats)
    mask_stm_ex_scr = ~np.in1d(stm_invar, scr_ft_stats)
else:
    mask_ism_ex_scr = np.in1d(ism_invar, ism_invar)
    mask_stm_ex_scr = np.in1d(stm_invar, stm_invar)
    
if only_sim_fts:
    mask_ism_only_sim_fts = np.in1d(ism_invar, sm_fts)
    mask_stm_only_sim_fts = np.in1d(stm_invar, sm_fts)
else:
    mask_ism_only_sim_fts = np.in1d(ism_invar, ism_invar)
    mask_stm_only_sim_fts = np.in1d(stm_invar, stm_invar)
    
if only_top_fts:    
    mask_ism_only_top_fts = np.in1d(ism_invar, top_fts_full)
    mask_stm_only_top_fts = np.in1d(stm_invar, top_fts_full)
else:
    mask_ism_only_top_fts = np.in1d(ism_invar, ism_invar)
    mask_stm_only_top_fts = np.in1d(stm_invar, stm_invar)
    
if ex_diff_fts:
    mask_ism_ex_diff = ~np.in1d(ism_invar, ex_fts_full)
    mask_stm_ex_diff = ~np.in1d(stm_invar, ex_fts_full)
else:
    mask_ism_ex_diff = np.in1d(ism_invar, ism_invar)
    mask_stm_ex_diff = np.in1d(stm_invar, stm_invar)
    
mask_ism = (mask_ism & mask_ism_last & mask_ism_ex_cps & mask_ism_ex_scr 
            & mask_ism_only_sim_fts & mask_ism_only_top_fts & mask_ism_ex_diff)
mask_stm = (mask_stm & mask_stm_last & mask_stm_ex_cps & mask_stm_ex_scr
            & mask_stm_only_sim_fts & mask_stm_only_top_fts & mask_stm_ex_diff)

idx_ism_all = np.arange(len(ism_invar))
idx_stm_all = np.arange(len(stm_invar))
mask_idx_ism = [idx for idx, bool in zip(idx_ism_all, mask_ism) if bool==True]
ism_pred = [pred for pred, bool in zip(ism_invar, mask_ism) if bool==True]
mask_idx_stm = [idx for idx, bool in zip(idx_stm_all, mask_stm) if bool==True]
stm_pred = [pred for pred, bool in zip(stm_invar, mask_stm) if bool==True]
print(ism_pred)
print(stm_pred)
print(mask_idx_stm)
stm_pred, mask_idx_stm = orderPredictors(ism_pred, stm_pred, mask_idx_stm)
print(stm_pred)
print(mask_idx_stm)

print(np.sum(~np.in1d(ism_pred, stm_pred)))

predictors = ism_pred


# predictors = [ft for ft in predictors if (ft not in ex_fts_full) and (ft in stm_invar)]



# mask_ism = np.in1d(ism_invar, predictors)
# mask_idx_ism = np.arange(len(mask_ism))
# mask_idx_ism = [idx for idx, bool in zip(mask_idx_ism, mask_ism) if bool==True]

# mask_stm = np.in1d(stm_invar, predictors)
# mask_idx_stm = np.arange(len(mask_stm))
# mask_idx_stm = [idx for idx, bool in zip(mask_idx_stm, mask_stm) if bool==True]

fname_ism_tt006 = os.path.join(fileDir, ism_tt_dir, 'ism_onset_tt_tlag006_twin006.pkl')
data006, model006 = getValidator(fname_ism_tt006, mask_idx_ism)
fname_ism_tt012 = os.path.join(fileDir, ism_tt_dir, 'ism_onset_tt_tlag012_twin012.pkl')
data012, model012 = getValidator(fname_ism_tt012, mask_idx_ism)


performance_ism_stm = {}

for tlag, twin in np.abs(combination):
#     try:
        print('timelag: {}h , timewin: {}h'.format(-tlag, twin))
            
        if train_on_time0:
            X_train = eval('data{:03d}'.format(twin))['X_train']
            y_train = eval('data{:03d}'.format(twin))['y_train']
#             X_train = X_train[:, mask_idx_ism]
        else:
            fname_ism_tt = 'ism_onset_tt_tlag{:03d}_twin{:03d}.pkl'.format(tlag, twin)
            fname_ism_tt = os.path.join(fileDir, ism_tt_dir, fname_ism_tt)            

            f_ism_tt = open(fname_ism_tt, 'rb')
            data_train = pickle.load(f_ism_tt)
            f_ism_tt.close()

            X_train1 = data_train['X_train']
            X_train2 = data_train['X_test']    
            y_train1 = data_train['y_train']
            y_train2 = data_train['y_test']

            X_train = np.concatenate((X_train1, X_train2), axis=0)
            y_train = np.concatenate((y_train1, y_train2), axis=0)

            X_train = X_train[:, mask_idx_ism]

        fname_stm_tt = 'stm_onset_tt_tlag{:03d}_twin{:03d}.pkl'.format(tlag, twin)
        fname_stm_tt = os.path.join(fileDir, stm_tt_dir, fname_stm_tt)
        f_stm_tt = open(fname_stm_tt, 'rb')
        data_test = pickle.load(f_stm_tt)
        f_stm_tt.close()

        X_test1 = data_test['X_train']
        X_test2 = data_test['X_test']    
        y_test1 = data_test['y_train']
        y_test2 = data_test['y_test']

        X_test = np.concatenate((X_test1, X_test2), axis=0)
        y_test = np.concatenate((y_test1, y_test2), axis=0)
        X_test = X_test[:, mask_idx_stm]
        

        data = {'X_train': X_train, 'y_train': y_train, 
                'X_test': X_test, 'y_test': y_test, 
                'predictors': predictors}       
        
        if train_on_time0:
            validator_onset = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                      ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin, 
                                                      pre_model=eval('model{:03d}'.format(twin)))
            
        else:
            validator_onset = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                      ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin)
        validator_onset.cv_full()
        performance_ism_stm[(tlag, twin)] = validator_onset.get_full() 

        print("length of performance: {}".format(len(performance_ism_stm)))
#     except:
#         pass

['age', 'nsbp_last', 'ndbp_last', 'hr_last', 'spo2_last', 'hemoglobin_last', 'temperature_last', 'wbc_last', 'platelet_last', 'albumin_last', 'urine_last', 'potassium_last', 'glucose_last', 'creatinine_last', 'lactic_acid_last', 'osi_last', 'si_last', 'oi_last']
['age', 'platelet_last', 'wbc_last', 'hemoglobin_last', 'urine_last', 'potassium_last', 'ndbp_last', 'nsbp_last', 'hr_last', 'lactic_acid_last', 'temperature_last', 'spo2_last', 'creatinine_last', 'albumin_last', 'glucose_last', 'osi_last', 'si_last', 'oi_last']
[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 80, 84, 89, 94]
['age', 'nsbp_last', 'ndbp_last', 'hr_last', 'spo2_last', 'hemoglobin_last', 'temperature_last', 'wbc_last', 'platelet_last', 'albumin_last', 'urine_last', 'potassium_last', 'glucose_last', 'creatinine_last', 'lactic_acid_last', 'osi_last', 'si_last', 'oi_last']
[0, 35, 30, 40, 55, 15, 50, 10, 5, 65, 20, 25, 80, 60, 45, 84, 89, 94]
0
Test set normalized ...
training ...
testing ...
0.715288652883
Tes

In [23]:
f = open(os.path.join(fileDir, ism_stm_result_dir, ism_stm_result_fname),'wb')
pickle.dump(performance_ism_stm, f)
f.close()
auc_twin012 = np.array([(tlag, performance_ism_stm[(tlag, twin)]['test auc']) for (tlag, twin) 
                        in performance_ism_stm if twin==12])
auc_twin006 = np.array([(tlag, performance_ism_stm[(tlag, twin)]['test auc']) for (tlag, twin) 
                        in performance_ism_stm if twin==6])
auc_twin012 = auc_twin012[auc_twin012[:,0].argsort(),:]
auc_twin006 = auc_twin006[auc_twin006[:,0].argsort(),:]

In [25]:
auc_twin006

array([[  6.        ,   0.63038067],
       [  7.        ,   0.65500242],
       [  8.        ,   0.65843032],
       [  9.        ,   0.64826522],
       [ 10.        ,   0.64018726],
       [ 11.        ,   0.63765964],
       [ 12.        ,   0.62327297],
       [ 13.        ,   0.60796535],
       [ 14.        ,   0.61289863],
       [ 15.        ,   0.62178692],
       [ 16.        ,   0.61234152],
       [ 17.        ,   0.63868455],
       [ 18.        ,   0.61255277],
       [ 19.        ,   0.62980691],
       [ 20.        ,   0.61299192],
       [ 21.        ,   0.57165714],
       [ 22.        ,   0.58790171],
       [ 23.        ,   0.58242666],
       [ 24.        ,   0.56391037]])

## Train-Test on inner-mixed ISM-STM data set
> ISM and STM dataframes are inner-joined. That is the missing columns in STM dataframe are ignored.

In [None]:
performance_inner = {}

for tlag, twin in np.abs(combination):
    try:
        print('timelag: {}h , timewin: {}h'.format(-tlag, twin))
        fname_inner_tt = 'across_inner_onset_tt_tlag{:03d}_twin{:03d}.pkl'.format(tlag, twin)
        fname_inner_tt = os.path.join(fileDir, 'train_test_across_inner', fname_inner_tt)
        
        f_inner_tt = open(fname_inner_tt, 'rb')
        data = pickle.load(f_inner_tt)
        f_inner_tt.close()


        validator_inner = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                  ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin)
        validator_inner.cv_full()
        performance_inner[(tlag, twin)] = validator_inner.get_full() 

        print("length of performance: {}".format(len(performance_inner)))
    except:
        pass

auc_inner_twin012 = np.array([(tlag, performance_inner[(tlag, twin)]['test auc']) 
                              for (tlag, twin) in performance_inner if twin==12])
auc_inner_twin006 = np.array([(tlag, performance_inner[(tlag, twin)]['test auc']) 
                              for (tlag, twin) in performance_inner if twin==6])
auc_inner_twin012 = auc_inner_twin012[auc_inner_twin012[:,0].argsort(),:]
auc_inner_twin006 = auc_inner_twin006[auc_inner_twin006[:,0].argsort(),:]

In [None]:
auc_inner_twin006

## Train-Test on outer-mixed ISM-STM data set
> ISM and STM dataframes are outer-joined. That is the missing columns in STM dataframe 
> are filled by ISM total population mean.

In [None]:
performance_outer = {}

for tlag, twin in np.abs(combination):
    try:
        print('timelag: {}h , timewin: {}h'.format(-tlag, twin))
        fname_outer_tt = 'across_outer_onset_tt_tlag{:03d}_twin{:03d}.pkl'.format(tlag, twin)
        fname_outer_tt = os.path.join(fileDir, 'train_test_across_outer', fname_outer_tt)
        
        f_outer_tt = open(fname_outer_tt, 'rb')
        data = pickle.load(f_outer_tt)
        f_outer_tt.close()

        validator_outer = ppaki.AKI_predictor_log(data, ready=True, cutoff=cut_off, fill_mode=fill_mode, 
                                                  ref_type=ref_type, cv=5, timelag=tlag, timewindow=twin)
        validator_outer.cv_full()
        performance_outer[(tlag, twin)] = validator_outer.get_full() 

        print("length of performance: {}".format(len(performance_outer)))
    except:
        pass

auc_outer_twin012 = np.array([(tlag, performance_outer[(tlag, twin)]['test auc']) 
                              for (tlag, twin) in performance_outer if twin==12])
auc_outer_twin006 = np.array([(tlag, performance_outer[(tlag, twin)]['test auc']) 
                              for (tlag, twin) in performance_outer if twin==6])
auc_outer_twin012[auc_outer_twin012[:,0].argsort(),:]
auc_outer_twin006[auc_outer_twin006[:,0].argsort(),:]


In [None]:
# performance_ism
# performance
# performance_inner
# performance_outer
f = open(os.path.join(fileDir, 'results_logreg', 'logreg_across_outer.pkl'),'wb')
pickle.dump(performance_outer, f)
f.close()