In [1]:
from __future__ import print_function
from __future__ import division

import string
import re
import random

import os
import sys
import argparse
import time
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torchviz import make_dot, make_dot_from_trace

from sklearn.metrics import roc_auc_score  
from sklearn.metrics import roc_curve 

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

try:
    import cPickle as pickle
except:
    import pickle


from bayes_opt import BayesianOptimization

In [2]:
import model_gp as model # need to check and modify
import Loaddata as Loaddata
import TrainVaTe as TVT

# check GPU availability
use_cuda = torch.cuda.is_available()
use_cuda

True

In [3]:
# Load data set and target values
set_x = pickle.load(open('Data/h143.visits', 'rb'), encoding='bytes')
set_y = pickle.load(open('Data/h143.labels', 'rb'),encoding='bytes')

model_x = []
for patient in set_x:
    model_x.append([each for visit in patient for each in visit])  
    
merged_set= [[set_y[i],model_x[i]] for i in range(len(set_y))]
print("\nLoading and preparing data...")    
train1, valid1, test1 = Loaddata.load_data(merged_set)
print("\nSample data after split:")  
print(train1[0])
print("model is", 'LR') 


Loading and preparing data...

Sample data after split:
[0, [1667, 144, 62, 85, 1667, 144, 62, 85]]
model is LR


In [4]:
#function to record comprehensive searching results 
def print2file(buf, outFile):
    outfd = open(outFile, 'a')
    outfd.write(buf + '\n')
    outfd.close()

logFile='testLR.log'
header = 'model|emb_dim|l2|lr|BestValidAUC|TestAUC|atEpoch'
print2file(header, logFile)

In [5]:
#Hyperparamters to tune for LR: embed_dim, l2, lr. Define a function to return the validation AUC of the model 
def LR_tune(embed_dim, l2, lr):
    #little transformations to use the searched values
    embed_dim = 2 ** int(embed_dim) #base 2 
    l2 = np.exp(l2) #base e
    lr = np.exp(lr) #base e
    ehr_model = model.EHR_LR(embed_dim = embed_dim)  
    if use_cuda:
        ehr_model = ehr_model.cuda()
    optimizer = optim.Adam(ehr_model.parameters(), lr=lr, weight_decay=l2)
    #current_loss_allep=[]
    #all_losses_allep=[]
    #avg_losses_allep=[]
    #train_auc_allep =[]
    #valid_auc_allep =[]
    #test_auc_allep=[]
    
    bestValidAuc = 0.0
    bestTestAuc = 0.0
    bestValidEpoch = 0
    
    for ep in range(25):
        #start = time.time()
        current_loss, train_loss = TVT.train(train1, model= ehr_model, optimizer = optimizer, batch_size = 1) #mb=args.mb
        avg_loss = np.mean(train_loss)
        #train_time = timeSince(start)
        #print ('\n Current running on: Epoch ', ep,'Training loss:',' Average loss', avg_loss)
        #print(train_loss, '\n train loss plot')
        #TVT.showPlot(train_loss)
        #eval_start = time.time()
        #train_auc, y_real, y_hat = TVT.calculate_auc(model= ehr_model, data = train1, which_model ='LR', batch_size = 1)
        #print ('\n Current running on: Epoch ', ep,' Training auc:', train_auc)
        #TVT.auc_plot(y_real, y_hat)
        valid_auc, y_real, y_hat  = TVT.calculate_auc(model = ehr_model, data = valid1, which_model = 'LR', batch_size = 1)
        #print ('\n Current running on: Epoch ', ep,' validation auc:', valid_auc)
        #TVT.auc_plot(y_real, y_hat)
        if valid_auc > bestValidAuc: 
            bestValidAuc = valid_auc
            bestValidEpoch = ep
            bestTestAuc, y_real, y_hat = TVT.calculate_auc(model = ehr_model, data = test1, which_model = 'LR', batch_size = 1)

        if ep - bestValidEpoch >5:
            break
        
      
        buf = '|%f |%f |%d ' % (bestValidAuc, bestTestAuc, bestValidEpoch)
        pFile= 'LR'+'|'+str(embed_dim)+'|'+str(l2)+'|'+str(lr)+'|'+buf    
        print2file(pFile, logFile)      
        
        #test_auc, y_real, y_hat = TVT.calculate_auc(model = ehr_model, data = test1, which_model = 'LR', batch_size = 1)
        #print ('\n Current running on: Epoch ', ep,' test auc:', test_auc)
        #TVT.auc_plot(y_real, y_hat)
        #eval_time = timeSince(eval_start)
        #print ("Epoch ", ep, "Summary:  Training_auc :", train_auc, " , Validation_auc : ", valid_auc, " ,& Test_auc : " , test_auc, " Avg Loss: ", avg_loss )
        #current_loss_allep.append(current_loss)
        #all_losses_allep.append(train_loss)
        #avg_losses_allep.append(avg_loss)
        #train_auc_allep.append(train_auc)
        #valid_auc_allep.append(valid_auc)
        #test_auc_allep.append(test_auc)
        #final_max_valid = max(valid_auc_allep)
    return bestValidAuc

In [6]:
if __name__ == "__main__":
    gp_params = {"alpha": 1e-4}

    LRBO = BayesianOptimization(LR_tune,
        {'embed_dim':(0, 8),'l2': (-16, 1), 'lr': (-11, -2) })
    LRBO.explore({'embed_dim':[1],'l2': [-11], 'lr': [-9]})

    LRBO.maximize(n_iter=30, **gp_params)

    print('-' * 53)
    print('Final Results')
    print('LR: %f' % LRBO.res['max']['max_val'])

[31mInitialization[0m
[94m-------------------------------------------------------------------[0m
 Step |   Time |      Value |   embed_dim |        l2 |        lr | 
    1 | 15m31s | [35m   0.77016[0m | [32m     1.0000[0m | [32m -11.0000[0m | [32m  -9.0000[0m | 
    2 | 09m56s |    0.65682 |      7.0153 |   -4.0200 |   -5.4448 | 
    3 | 11m11s | [35m   0.77134[0m | [32m     7.4061[0m | [32m  -5.6801[0m | [32m  -8.3787[0m | 
    4 | 08m44s |    0.74046 |      1.9991 |  -14.9532 |   -4.9908 | 
    5 | 04m18s |    0.59687 |      2.4018 |   -2.3075 |   -4.9971 | 
    6 | 06m59s |    0.54415 |      1.5349 |   -3.3380 |   -2.8736 | 
[31mBayesian Optimization[0m
[94m-------------------------------------------------------------------[0m
 Step |   Time |      Value |   embed_dim |        l2 |        lr | 
    7 | 20m33s |    0.71027 |      8.0000 |  -16.0000 |  -11.0000 | 
    8 | 10m32s |    0.58465 |      8.0000 |    1.0000 |  -11.0000 | 
    9 | 21m49s |    0.53766 |

  " state: %s" % convergence_dict)


   25 | 10m15s |    0.77166 |      1.4851 |   -9.8711 |   -6.9459 | 
   26 | 08m53s |    0.76915 |      5.9699 |   -8.1378 |   -8.2801 | 
   27 | 06m05s |    0.77147 |      0.0095 |  -10.8550 |   -6.9711 | 


  " state: %s" % convergence_dict)


   28 | 14m15s |    0.77765 |      8.0000 |   -6.3627 |  -10.1488 | 
   29 | 12m37s |    0.76204 |      7.9997 |   -7.7332 |   -8.6366 | 


  " state: %s" % convergence_dict)


   30 | 09m57s |    0.77800 |      1.5723 |   -9.0584 |   -7.4388 | 
   31 | 09m29s |    0.77830 |      0.0000 |   -9.3183 |   -7.0891 | 
   32 | 19m28s |    0.76878 |      7.9978 |   -5.7608 |  -11.0000 | 
   33 | 04m58s |    0.77346 |      1.5647 |  -10.2258 |   -6.7250 | 
   34 | 07m05s |    0.77447 |      0.0064 |   -8.9613 |   -7.1064 | 
   35 | 09m47s |    0.77240 |      0.0255 |  -10.8283 |   -7.1434 | 
   36 | 07m00s |    0.77170 |      5.4919 |   -6.4287 |   -9.1667 | 
-----------------------------------------------------
Final Results
LR: 0.779816


In [7]:
#l2
np.exp(-8.8122)

0.00014890530365549773

In [8]:
#lr 
np.exp(-7.0593)

0.0008593794490898537