This notebook is to confirm whether we can achieve the same result (best validation AUC) locally on the server based on the best parameters so far searched on SigOpt.com
* Model searched: RNN with RNN cell. SigOpt gives Validation AUC of **0.803**
* Hyperparameters tested: dropout: **0.75457543972787**; embdim_exp: **9**; eps_exp: **-4**; hid_exp: **9**; l2_exp: **-5**; layers_n: **1**; lr_exp: **-3**; opt_code: **RMSprop**  [click here](https://sigopt.com/guest?guest_token=JXVRREUZOUEXOLLHIBUNTCEWFXQPQQSFUVMLBFKXMDCICYYY)
* Result: Using the same set of parameters, local BO with gp_param = 1e-14 (we tried 1e-4~1e-15) gives a best validation auc of 0.70582(first point), which is quite different from the SigOpt. While the exact mechanism differences are unkown, we speculate that it might be because that our local BO package only utilizes Matern kernel from sklearn, and alpha (noise level to the kernel) is the only parameter that we can control for Gaussian process, while [this paper](https://static.sigopt.com/b/20a144d208ef255d3b981ce419667ec25d8412e2/pdf/SigOpt_Bayesian_Optimization_Primer.pdf) indicates that SigOpt draws on current open sources including SPEARMINT,MOE, HYPEROPT, SMAC, and therefore likely to be more flexible in choosing best Gaussian process to optimize target function.

In [1]:
from __future__ import print_function
from __future__ import division

import string
import re
import random

import os
import sys
import argparse
import time
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torchviz import make_dot, make_dot_from_trace

from sklearn.metrics import roc_auc_score  
from sklearn.metrics import roc_curve 

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

try:
    import cPickle as pickle
except:
    import pickle


from bayes_opt import BayesianOptimization

In [2]:
torch.backends.cudnn.enabled=False

In [3]:
torch.cuda.set_device(0)

In [4]:
import model_HPS as model #this changed
import Loaddata_final as Loaddata
import TrVaTe as TVT #This changed 

# check GPU availability
use_cuda = torch.cuda.is_available()
use_cuda

True

In [5]:
# Load data set and target values
set_x = pickle.load(open('Data/h143.visits', 'rb'), encoding='bytes')
set_y = pickle.load(open('Data/h143.labels', 'rb'),encoding='bytes')

"""
model_x = []
for patient in set_x:
    model_x.append([each for visit in patient for each in visit])  
    
"""
model_x = set_x  #this is for the rest of the models
    
merged_set= [[set_y[i],model_x[i]] for i in range(len(set_y))] #list of list or list of lists of lists
print("\nLoading and preparing data...")    
train1, valid1, test1 = Loaddata.load_data(merged_set)
print("\nSample data after split:")  
print(train1[0])
print("model is", 'RNN_GRU') #can change afterwards, currently on most basic RNN


Loading and preparing data...

Sample data after split:
[0, [[1667, 144, 62, 85], [1667, 144, 62, 85]]]
model is RNN_GRU


In [6]:
def print2file(buf, outFile):
    outfd = open(outFile, 'a')
    outfd.write(buf + '\n')
    outfd.close()

logFile='RNN_plain_confirmation.log'
header = 'Model|EmbSize|CellType|n_Layers|Hidden|Dropout|Optimizer|LR|L2|EPs|BestValidAUC|TestAUC|atEpoch'
print2file(header, logFile)

In [7]:
epochs = 100

In [8]:
def model_tune(dlm_code, ct_code, opt_code, embdim_exp, hid_exp, layers_n, dropout, l2_exp , lr_exp, eps_exp):
    #little transformations to use the searched values
    embed_dim = 2** int(embdim_exp)
    hidden_size = 2** int(hid_exp)
    n_layers = int(layers_n)
    dropout = round(dropout, 4)
    l2 = 10** int(l2_exp)
    lr = 10** int(lr_exp)
    eps = 10** int(eps_exp)

        
    #dealing with categorical data
    if int(dlm_code)<3:
      if int(ct_code) ==1:
          cell_type='RNN'   
      elif int(ct_code) ==2:
          cell_type='LSTM'
      elif int(ct_code) ==3:
          cell_type='GRU'
      
    if int(dlm_code)==1:
        w_model='RNN'
        ehr_model = model.EHR_RNN(20000, embed_dim, hidden_size, n_layers, dropout, cell_type)
    elif int(dlm_code)==2:
        w_model='DRNN'
        ehr_model = model.DRNN(20000, embed_dim, hidden_size, n_layers, dropout, cell_type)

    if use_cuda:
        ehr_model = ehr_model.cuda(0)    
        
        
    if int(opt_code) ==1:
        opt= 'Adadelta'
        optimizer = optim.Adadelta(ehr_model.parameters(), lr=lr, weight_decay=l2 ,eps=eps) ## rho=0.9
    elif int(opt_code) ==2:
        opt= 'Adagrad'
        optimizer = optim.Adagrad(ehr_model.parameters(), lr=lr, weight_decay=l2) ##lr_decay no eps
    elif int(opt_code) ==3:
        opt= 'Adam'
        optimizer = optim.Adam(ehr_model.parameters(), lr=lr, weight_decay=l2,eps=eps ) ## Beta defaults (0.9, 0.999), amsgrad (false)
    elif int(opt_code) ==4:
        opt= 'Adamax'
        optimizer = optim.Adamax(ehr_model.parameters(), lr=lr, weight_decay=l2 ,eps=eps) ### Beta defaults (0.9, 0.999)
    elif int(opt_code) ==5:
        opt= 'RMSprop'
        optimizer = optim.RMSprop(ehr_model.parameters(), lr=lr, weight_decay=l2 ,eps=eps)                
    elif int(opt_code) ==6:
        opt= 'ASGD'
        optimizer = optim.ASGD(ehr_model.parameters(), lr=lr, weight_decay=l2 ) ### other parameters
    elif int(opt_code) ==7:
        opt= 'SGD'
        optimizer = optim.SGD(ehr_model.parameters(), lr=lr, weight_decay=l2 ) ### other parameters
  
    
    bestValidAuc = 0.0
    bestTestAuc = 0.0
    bestValidEpoch = 0
  
    for ep in range(epochs):
        current_loss, train_loss = TVT.train(train1, model= ehr_model, optimizer = optimizer, batch_size = 200)
        avg_loss = np.mean(train_loss)
        valid_auc, y_real, y_hat  = TVT.calculate_auc(model = ehr_model, data = valid1, which_model = w_model, batch_size = 200)
        if valid_auc > bestValidAuc: 
            bestValidAuc = valid_auc
            bestValidEpoch = ep
            best_model= ehr_model
            #bestTestAuc, y_real, y_hat = TVT.calculate_auc(model = ehr_model, data = test1, which_model = w_model, batch_size = 200)

        if ep - bestValidEpoch > 12:
            break
      

    fname= w_model+'E'+str(embed_dim)+cell_type+'L'+str(n_layers)+'H'+str(hidden_size)+'D'+str(dropout)+opt+'L'+str(lr)+'P'+str(l2)  
    bmodel_pth='models/'+fname
    bestTestAuc, y_real, y_hat = TVT.calculate_auc(model = best_model, data = test1, which_model = w_model, batch_size = 200)
    torch.save(best_model, bmodel_pth)
    buf = '|%f |%f |%d ' % (bestValidAuc, bestTestAuc, bestValidEpoch )
    
    pFile= w_model+'|'+str(embed_dim)+'|'+cell_type+'|'+str(n_layers)+'|'+str(hidden_size)+'|'+str(dropout)+'|'+opt+'|'+str(lr)+'|'+str(l2)+'|'+str(eps)+ buf  
    print2file(pFile, logFile)
    
    return bestValidAuc

In [None]:
if __name__ == "__main__":
    gp_params = {"alpha": 1e-14} #change this noise level

#Here we loop through different models, change the model tune 
for cti in range(1,2):  #Just plainRNN (==1)
    for dlmi in range(1,2):#just RNN (==1)
        for opti in range(5,6): #RMSprop 
            print('\n Now Tuning model with Bayesian Optimization: ','cell code', str(cti),'model code', str(dlmi),'optimizer code',str(opti))
            NNBO = BayesianOptimization(model_tune,
                                        {'dlm_code':(dlmi,dlmi), 'ct_code': (cti, cti), 'opt_code':(opti, opti),
                                         'embdim_exp': (5, 9),'hid_exp': (5, 9),'layers_n': (1, 3),'dropout': (0, 1),
                                         'l2_exp': (-6, -1), 'lr_exp': (-7, -2), 'eps_exp': (-9, -4)})
            NNBO.explore({'dlm_code':[dlmi], 'ct_code': [cti], 'opt_code':[opti],'embdim_exp': [9],
                          'hid_exp': [9],'layers_n': [1],'dropout': [0.75457543972787],'l2_exp': [-5], 'lr_exp': [-3], 'eps_exp':[-4]},
                          eager = True)

            NNBO.maximize(n_iter=5, **gp_params)

            print('-' * 53)
            print('Final Results')
            print('RNN / DRNN: %f' % NNBO.res['max']['max_val'])

            print2file(str(NNBO.res['max']), logFile)


 Now Tuning model with Bayesian Optimization:  cell code 1 model code 1 optimizer code 5
[31mInitialization[0m
[94m-----------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   ct_code |   dlm_code |   dropout |   embdim_exp |   eps_exp |   hid_exp |    l2_exp |   layers_n |    lr_exp |   opt_code | 
    1 | 09m06s | [35m   0.70582[0m | [32m   1.0000[0m | [32m    1.0000[0m | [32m   0.7546[0m | [32m      9.0000[0m | [32m  -4.0000[0m | [32m   9.0000[0m | [32m  -5.0000[0m | [32m    1.0000[0m | [32m  -3.0000[0m | [32m    5.0000[0m | 
[31mInitialization[0m
[94m-----------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   ct_code |   dlm_code |   dropout |   embdim_exp |   eps_exp |   hid_exp |    l2_exp |  