Mia test (#7)

only updates to the model file
ZhiGroup · Mar 25, 2019 · 6cb003a · 6cb003a
1 parent a2a55df
commit 6cb003a
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 8 deletions.
diff --git a/ehr_pytorch/main.py b/ehr_pytorch/main.py
@@ -58,9 +58,12 @@ def main():
 
     #EHRdataloader 
     parser.add_argument('-root_dir', type = str, default = '../data/' , help='the path to the folders with pickled file(s)')
+
+    ### Kept original -files variable not forcing original unique naming for files
     parser.add_argument('-files', type = list, default = ['hf.train'], help='''the list of name(s) of pickled file(s). 
                         If list of 1: data will be first split into train, validation and test, then 3 dataloaders will be created.
                         If list of 3: 3 dataloaders will be created from 3 files directly. Please give files in this order: training, validation and test.''')
+
     parser.add_argument('-test_ratio', type = float, default = 0.2, help='test data size [default: 0.2]')
     parser.add_argument('-valid_ratio', type = float, default = 0.1, help='validation data size [default: 0.1]')
     parser.add_argument('-batch_size', type=int, default=128, help='batch size for training, validation or test [default: 128]')
@@ -95,14 +98,15 @@ def main():
 
 
     ####Step1. Data preparation
+
     print(colored("\nLoading and preparing data...", 'green'))
     if len(args.files) == 1:
         print('1 file found. Data will be split into train, validation and test.')
         data = EHRdataFromPickles(root_dir = args.root_dir, 
                               file = args.files[0], 
                               sort= False,
                               test_ratio = args.test_ratio, 
-                              valid_ratio = args.valid_ratio) #prevent shuffle before splitting
+                              valid_ratio = args.valid_ratio) #No sort before splitting
 
         # Dataloader splits
         train, test, valid = data.__splitdata__() #this time, sort is true
@@ -132,6 +136,7 @@ def main():
     trainloader = EHRdataloader(train, batch_size = args.batch_size) 
     validloader = EHRdataloader(valid, batch_size = args.batch_size)
     testloader = EHRdataloader(test, batch_size = args.batch_size)
+
 
 
     #####Step2. Model loading
@@ -163,7 +168,7 @@ def main():
                                   dropout_r=args.dropout_r, #default =0.1
                                   cell_type= 'QRNN', #doesn't support normal cell types
                                   bii= False, #QRNN doesn't support bi
-                                  time = args.time, 
+                                  time = args.time,
                                   preTrainEmb= args.preTrainEmb)  
 
     elif args.which_model == 'TLSTM': 
@@ -238,11 +243,12 @@ def main():
                       output_dir = args.output_dir,
                       model_prefix = args.model_prefix,
                       model_customed = args.model_customed)
+
     #we can keyboard interupt now 
     except KeyboardInterrupt:
         print(colored('-' * 89, 'green'))
         print(colored('Exiting from training early','green'))
 
 #do the main file functions and runs 
 if __name__ == "__main__":
-    main()    
+    main()
diff --git a/ehr_pytorch/models.py b/ehr_pytorch/models.py
@@ -25,7 +25,9 @@
 # Model 1:RNN & Variations: GRU, LSTM, Bi-RNN, Bi-GRU, Bi-LSTM
 class EHR_RNN(EHREmbeddings):
     def __init__(self,input_size,embed_dim, hidden_size, n_layers=1,dropout_r=0.1,cell_type='GRU',bii=False ,time=False, preTrainEmb='',packPadMode = True):
-        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers=1,dropout_r=0.1,cell_type='GRU', bii=False, time=False , preTrainEmb='',packPadMode = True)
+
+       	EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, bii, time , preTrainEmb, packPadMode)
+
 
 
     #embedding function goes here 
@@ -67,7 +69,9 @@ def forward(self, input):
 #Model 2: DRNN, DGRU, DLSTM
 class EHR_DRNN(EHREmbeddings): 
     def __init__(self,input_size,embed_dim, hidden_size, n_layers, dropout_r=0.1,cell_type='GRU', bii=False, time=False, preTrainEmb='', packPadMode = False):
-        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers ,dropout_r=0.1,cell_type='GRU', time=False , preTrainEmb='', packPadMode = False)
+
+        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb, packPadMode)
+
         #super(DRNN, self).__init__()
         #The additional parameters that norma RNNs don't have
 
@@ -210,7 +214,9 @@ def init_hidden(self, batch_size, hidden_size):
 # Model 3: QRNN
 class EHR_QRNN(EHREmbeddings):
     def __init__(self,input_size,embed_dim, hidden_size, n_layers =1 ,dropout_r=0.1, cell_type='QRNN', bii=False, time=False, preTrainEmb='', packPadMode = False):
-        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers = 1 ,dropout_r=0.1, cell_type='QRNN', time=False, preTrainEmb='', packPadMode = False)
+
+        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb, packPadMode)
+
         #super(EHR_QRNN, self).__init__()
         #basically, we dont allow cell_type and bii choices
         #let's enfroce these:
@@ -239,7 +245,9 @@ def forward(self, input):
 # Model 4: T-LSTM
 class EHR_TLSTM(EHREmbeddings):
     def __init__(self,input_size,embed_dim, hidden_size, n_layers =1 ,dropout_r=0.1, cell_type='TLSTM', bii=False, time=False, preTrainEmb=''):
-        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers = 1 ,dropout_r=0.1, cell_type='TLSTM', time=False, preTrainEmb='')
+
+        EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb)
+
         #test on EHR_TLSTM() parameters please
         #modify something here to make sure everything runs correctly
         '''ask laila if i Implemented the right model parameters regarding, time, bii, and pretrained,

diff --git a/ehr_pytorch/utils.py b/ehr_pytorch/utils.py
@@ -86,14 +86,15 @@ def trainsample(sample, model, optimizer, criterion = nn.BCELoss()):
 
 
 #train with loaders
+
 def trainbatches(loader, model, optimizer, shuffle = True):#,we dont need this print print_every = 10, plot_every = 5): 
     current_loss = 0
     all_losses =[]
     plot_every = 5
     n_iter = 0 
     if shuffle: 
          #we shuffle batches if shuffle is true
-         loader = iter_batch2(loader, len(loader)) 
+         loader = iter_batch2(loader, len(loader))
     for i,batch in enumerate(loader):
         #batch.to(device) #see if it works
         output, loss = trainsample(batch, model, optimizer, criterion = nn.BCELoss())
@@ -107,7 +108,9 @@ def trainbatches(loader, model, optimizer, shuffle = True):#,we dont need this p
     return current_loss, all_losses 
 
 
+
 def calculate_auc(model, loader, which_model = 'RNN', shuffle = True): # batch_size= 128 not needed
+
     y_real =[]
     y_hat= []
     if shuffle: 
@@ -129,6 +132,7 @@ def calculate_auc(model, loader, which_model = 'RNN', shuffle = True): # batch_s
 
 
 #define the final epochs running, use the different names
+
 def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, which_model = 'RNN', patience = 20, output_dir = '../models/', model_prefix = 'hf.train', model_customed= ''):  
     bestValidAuc = 0.0
     bestTestAuc = 0.0
@@ -139,6 +143,7 @@ def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, whi
     for ep in range(epochs):
         start = time.time()
         current_loss, train_loss = trainbatches(loader = train, model= model, optimizer = optimizer)
+
         train_time = timeSince(start)
         #epoch_loss.append(train_loss)
         avg_loss = np.mean(train_loss)
@@ -176,5 +181,6 @@ def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, whi
     print2file(pFile, logFile) 
     print(colored('BestValidAuc %f has a TestAuc of %f at epoch %d ' % (bestValidAuc, bestTestAuc, bestValidEpoch),'green'))
     print(colored('Details see ../models/%sEHRmodel.log' %(model_prefix + model_customed),'green'))
+