In [1]:
import numpy as np
import pickle
from sklearn import linear_model
from sklearn.metrics import roc_auc_score, classification_report, roc_curve
import time

In [2]:
input_path = "E:/CS_Master_Degree_UIUC/CS598_DeepLearning_for_Health_Data/Project/paper290/Processed_data/"

In [3]:
picklefile = "mort_labels.pkl"
with open(input_path + picklefile, "rb") as pkl_rb_obj:
    mort_labels = pickle.load(pkl_rb_obj)

mort_labels = np.array(mort_labels)

In [4]:
icds = np.load(input_path + 'icd_inputs.npy')
meds = np.load(input_path + 'med_inputs.npy')
labs = np.load(input_path + 'lab_inputs.npy')
input_seqs = np.load(input_path + 'input_seqs.npy')

In [5]:
trainratio = 0.7
validratio = 0.1
testratio = 0.2

trainlindex = int(len(icds)*trainratio)
validlindex = int(len(icds)*(trainratio + validratio))

In [6]:
def Train(input_data, labels, run = 10):
    
    test_aucrocs = []
    
    for run in range(10):
        
        # Randomize the data at each run
        perm = np.random.permutation(input_data.shape[0])
        rinput_seqs = input_data[perm]
        rlabels = labels[perm]
        
        # Get training seqs and labels
        train_input_seqs = rinput_seqs[:trainlindex]
        train_labels = rlabels[:trainlindex]

        # Get validate seqs and labels
        valid_input_seqs = rinput_seqs[trainlindex:validlindex]
        valid_labels = rlabels[trainlindex:validlindex]

        # Get test seqs and labels
        test_input_seqs = rinput_seqs[validlindex:]
        test_labels = rlabels[validlindex:]

        # Create and fit mode, make sure solver = 'liblinear', or error about string decoding will pop up
        model = linear_model.LogisticRegression(solver='liblinear')
        model.fit(train_input_seqs, train_labels)

        # Print AUC_ROC score of validation
        vpredict_probabilities = np.array([a[1] for a in model.predict_proba(valid_input_seqs)])
        print("Validation AUC_ROC: ", roc_auc_score(valid_labels, vpredict_probabilities))
        
        # Print AUC_ROC score of testing
        predict_probabilities = np.array([a[1] for a in model.predict_proba(test_input_seqs)])
        print("Test AUC_ROC: ", roc_auc_score(test_labels, predict_probabilities))

        # Pool all testing AUC_ROC scores 
        test_aucrocs.append(roc_auc_score(test_labels, predict_probabilities))
        
    mean_aucrocs = np.mean(test_aucrocs)
    sd_aucrocs = np.std(test_aucrocs)
    
    return mean_aucrocs, sd_aucrocs

## Use only ICD features as input

In [7]:
input_data = icds
labels = mort_labels
run = 10
start = time.time()
mean_aucrocs, sd_aucrocs = Train(input_data, labels, run = run)
end = time.time()
print("execution time is:", (end-start), 'seconds')
print("Average AUCROC:", np.round(mean_aucrocs,2), "+/-",np.round(sd_aucrocs,4))

Validation AUC_ROC:  0.7994297814951082
Test AUC_ROC:  0.8057867481459168
Validation AUC_ROC:  0.8373653581386739
Test AUC_ROC:  0.8091457507209805
Validation AUC_ROC:  0.8044823232323233
Test AUC_ROC:  0.8118268724293672
Validation AUC_ROC:  0.8407695602512849
Test AUC_ROC:  0.8052989141436634
Validation AUC_ROC:  0.7988751271465326
Test AUC_ROC:  0.8106102424297044
Validation AUC_ROC:  0.8033180454107921
Test AUC_ROC:  0.8161446273904468
Validation AUC_ROC:  0.8261306770457476
Test AUC_ROC:  0.8093678237593354
Validation AUC_ROC:  0.8183671735621443
Test AUC_ROC:  0.8058741699257198
Validation AUC_ROC:  0.8179298846431144
Test AUC_ROC:  0.8348529416077446
Validation AUC_ROC:  0.8300915389672787
Test AUC_ROC:  0.8303880672481354
execution time is: 5.694762229919434 seconds
Average AUCROC: 0.81 +/- 0.0099


## Use only medication as input

In [8]:
input_data = meds
labels = mort_labels
run = 10
start = time.time()
mean_aucrocs, sd_aucrocs = Train(input_data, labels, run = run)
end = time.time()
print("execution time is:", (end-start), 'seconds')
print("Average AUCROC:", np.round(mean_aucrocs,2), "+/-",np.round(sd_aucrocs,4))

Validation AUC_ROC:  0.8224567828100597
Test AUC_ROC:  0.8541607809900493
Validation AUC_ROC:  0.848560606060606
Test AUC_ROC:  0.8377430125477947
Validation AUC_ROC:  0.8418997726598793
Test AUC_ROC:  0.8369654575611163
Validation AUC_ROC:  0.8191008043364224
Test AUC_ROC:  0.8460680902152475
Validation AUC_ROC:  0.8543529411764706
Test AUC_ROC:  0.863259769777843
Validation AUC_ROC:  0.8588045234248789
Test AUC_ROC:  0.8487959956709956
Validation AUC_ROC:  0.8392055016768976
Test AUC_ROC:  0.8631845388332011
Validation AUC_ROC:  0.8588583920330337
Test AUC_ROC:  0.8353310813265047
Validation AUC_ROC:  0.870845522509345
Test AUC_ROC:  0.8432899338473855
Validation AUC_ROC:  0.8742123097152683
Test AUC_ROC:  0.8332588362959901
execution time is: 4.629408597946167 seconds
Average AUCROC: 0.85 +/- 0.0105


## Use only Lab as input

In [9]:
input_data = labs
labels = mort_labels
run = 10
start = time.time()
mean_aucrocs, sd_aucrocs = Train(input_data, labels, run = run)
end = time.time()
print("execution time is:", (end-start), 'seconds')
print("Average AUCROC:", np.round(mean_aucrocs,2), "+/-",np.round(sd_aucrocs,4))

Validation AUC_ROC:  0.7457558944267648
Test AUC_ROC:  0.7818894739822208
Validation AUC_ROC:  0.7619480519480519
Test AUC_ROC:  0.7604059471743417
Validation AUC_ROC:  0.7624323303537706
Test AUC_ROC:  0.7660894113681372
Validation AUC_ROC:  0.7794251699559597
Test AUC_ROC:  0.7430526788180061
Validation AUC_ROC:  0.8016289653929345
Test AUC_ROC:  0.759095804836017
Validation AUC_ROC:  0.7440293799567412
Test AUC_ROC:  0.7660854236980223
Validation AUC_ROC:  0.7177849498895872
Test AUC_ROC:  0.76924825094357
Validation AUC_ROC:  0.8085714285714285
Test AUC_ROC:  0.7534555438460259
Validation AUC_ROC:  0.7575813609467457
Test AUC_ROC:  0.7652422142846896
Validation AUC_ROC:  0.7913122415358234
Test AUC_ROC:  0.7518914717102223
execution time is: 3.134287118911743 seconds
Average AUCROC: 0.76 +/- 0.0102


## Use all concatenated features

In [10]:
input_data = input_seqs
labels = mort_labels
run = 10
start = time.time()
mean_aucrocs, sd_aucrocs = Train(input_data, labels, run = run)
end = time.time()
print("execution time is:", (end-start), 'seconds')
print("Average AUCROC:", np.round(mean_aucrocs,2), "+/-",np.round(sd_aucrocs,4))

Validation AUC_ROC:  0.8771328831033245
Test AUC_ROC:  0.8694127624729483
Validation AUC_ROC:  0.8720394919296689
Test AUC_ROC:  0.8826226175904879
Validation AUC_ROC:  0.8873814410117035
Test AUC_ROC:  0.8921826175925669
Validation AUC_ROC:  0.8690332242498198
Test AUC_ROC:  0.866251957619185
Validation AUC_ROC:  0.8972536348949918
Test AUC_ROC:  0.8957080573013986
Validation AUC_ROC:  0.8943002408348942
Test AUC_ROC:  0.8838156856978626
Validation AUC_ROC:  0.8555234697921975
Test AUC_ROC:  0.8811284374991355
Validation AUC_ROC:  0.8818811562183559
Test AUC_ROC:  0.8782874788659577
Validation AUC_ROC:  0.871513813603777
Test AUC_ROC:  0.896750558880796
Validation AUC_ROC:  0.8855834993806881
Test AUC_ROC:  0.8749060321097712
execution time is: 12.864949464797974 seconds
Average AUCROC: 0.88 +/- 0.0099
