Skip to content

Commit

Permalink
Mia test (#7)
Browse files Browse the repository at this point in the history
only updates to the model file
  • Loading branch information
2miatran authored and lrasmy committed Mar 25, 2019
1 parent a2a55df commit 6cb003a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
12 changes: 9 additions & 3 deletions ehr_pytorch/main.py
Expand Up @@ -58,9 +58,12 @@ def main():

#EHRdataloader
parser.add_argument('-root_dir', type = str, default = '../data/' , help='the path to the folders with pickled file(s)')

### Kept original -files variable not forcing original unique naming for files
parser.add_argument('-files', type = list, default = ['hf.train'], help='''the list of name(s) of pickled file(s).
If list of 1: data will be first split into train, validation and test, then 3 dataloaders will be created.
If list of 3: 3 dataloaders will be created from 3 files directly. Please give files in this order: training, validation and test.''')

parser.add_argument('-test_ratio', type = float, default = 0.2, help='test data size [default: 0.2]')
parser.add_argument('-valid_ratio', type = float, default = 0.1, help='validation data size [default: 0.1]')
parser.add_argument('-batch_size', type=int, default=128, help='batch size for training, validation or test [default: 128]')
Expand Down Expand Up @@ -95,14 +98,15 @@ def main():


####Step1. Data preparation

print(colored("\nLoading and preparing data...", 'green'))
if len(args.files) == 1:
print('1 file found. Data will be split into train, validation and test.')
data = EHRdataFromPickles(root_dir = args.root_dir,
file = args.files[0],
sort= False,
test_ratio = args.test_ratio,
valid_ratio = args.valid_ratio) #prevent shuffle before splitting
valid_ratio = args.valid_ratio) #No sort before splitting

# Dataloader splits
train, test, valid = data.__splitdata__() #this time, sort is true
Expand Down Expand Up @@ -132,6 +136,7 @@ def main():
trainloader = EHRdataloader(train, batch_size = args.batch_size)
validloader = EHRdataloader(valid, batch_size = args.batch_size)
testloader = EHRdataloader(test, batch_size = args.batch_size)



#####Step2. Model loading
Expand Down Expand Up @@ -163,7 +168,7 @@ def main():
dropout_r=args.dropout_r, #default =0.1
cell_type= 'QRNN', #doesn't support normal cell types
bii= False, #QRNN doesn't support bi
time = args.time,
time = args.time,
preTrainEmb= args.preTrainEmb)

elif args.which_model == 'TLSTM':
Expand Down Expand Up @@ -238,11 +243,12 @@ def main():
output_dir = args.output_dir,
model_prefix = args.model_prefix,
model_customed = args.model_customed)

#we can keyboard interupt now
except KeyboardInterrupt:
print(colored('-' * 89, 'green'))
print(colored('Exiting from training early','green'))

#do the main file functions and runs
if __name__ == "__main__":
main()
main()
16 changes: 12 additions & 4 deletions ehr_pytorch/models.py
Expand Up @@ -25,7 +25,9 @@
# Model 1:RNN & Variations: GRU, LSTM, Bi-RNN, Bi-GRU, Bi-LSTM
class EHR_RNN(EHREmbeddings):
def __init__(self,input_size,embed_dim, hidden_size, n_layers=1,dropout_r=0.1,cell_type='GRU',bii=False ,time=False, preTrainEmb='',packPadMode = True):
EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers=1,dropout_r=0.1,cell_type='GRU', bii=False, time=False , preTrainEmb='',packPadMode = True)

EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, bii, time , preTrainEmb, packPadMode)



#embedding function goes here
Expand Down Expand Up @@ -67,7 +69,9 @@ def forward(self, input):
#Model 2: DRNN, DGRU, DLSTM
class EHR_DRNN(EHREmbeddings):
def __init__(self,input_size,embed_dim, hidden_size, n_layers, dropout_r=0.1,cell_type='GRU', bii=False, time=False, preTrainEmb='', packPadMode = False):
EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers ,dropout_r=0.1,cell_type='GRU', time=False , preTrainEmb='', packPadMode = False)

EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb, packPadMode)

#super(DRNN, self).__init__()
#The additional parameters that norma RNNs don't have

Expand Down Expand Up @@ -210,7 +214,9 @@ def init_hidden(self, batch_size, hidden_size):
# Model 3: QRNN
class EHR_QRNN(EHREmbeddings):
def __init__(self,input_size,embed_dim, hidden_size, n_layers =1 ,dropout_r=0.1, cell_type='QRNN', bii=False, time=False, preTrainEmb='', packPadMode = False):
EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers = 1 ,dropout_r=0.1, cell_type='QRNN', time=False, preTrainEmb='', packPadMode = False)

EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb, packPadMode)

#super(EHR_QRNN, self).__init__()
#basically, we dont allow cell_type and bii choices
#let's enfroce these:
Expand Down Expand Up @@ -239,7 +245,9 @@ def forward(self, input):
# Model 4: T-LSTM
class EHR_TLSTM(EHREmbeddings):
def __init__(self,input_size,embed_dim, hidden_size, n_layers =1 ,dropout_r=0.1, cell_type='TLSTM', bii=False, time=False, preTrainEmb=''):
EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers = 1 ,dropout_r=0.1, cell_type='TLSTM', time=False, preTrainEmb='')

EHREmbeddings.__init__(self,input_size, embed_dim ,hidden_size, n_layers, dropout_r, cell_type, time , preTrainEmb)

#test on EHR_TLSTM() parameters please
#modify something here to make sure everything runs correctly
'''ask laila if i Implemented the right model parameters regarding, time, bii, and pretrained,
Expand Down
8 changes: 7 additions & 1 deletion ehr_pytorch/utils.py
Expand Up @@ -86,14 +86,15 @@ def trainsample(sample, model, optimizer, criterion = nn.BCELoss()):


#train with loaders

def trainbatches(loader, model, optimizer, shuffle = True):#,we dont need this print print_every = 10, plot_every = 5):
current_loss = 0
all_losses =[]
plot_every = 5
n_iter = 0
if shuffle:
#we shuffle batches if shuffle is true
loader = iter_batch2(loader, len(loader))
loader = iter_batch2(loader, len(loader))
for i,batch in enumerate(loader):
#batch.to(device) #see if it works
output, loss = trainsample(batch, model, optimizer, criterion = nn.BCELoss())
Expand All @@ -107,7 +108,9 @@ def trainbatches(loader, model, optimizer, shuffle = True):#,we dont need this p
return current_loss, all_losses



def calculate_auc(model, loader, which_model = 'RNN', shuffle = True): # batch_size= 128 not needed

y_real =[]
y_hat= []
if shuffle:
Expand All @@ -129,6 +132,7 @@ def calculate_auc(model, loader, which_model = 'RNN', shuffle = True): # batch_s


#define the final epochs running, use the different names

def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, which_model = 'RNN', patience = 20, output_dir = '../models/', model_prefix = 'hf.train', model_customed= ''):
bestValidAuc = 0.0
bestTestAuc = 0.0
Expand All @@ -139,6 +143,7 @@ def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, whi
for ep in range(epochs):
start = time.time()
current_loss, train_loss = trainbatches(loader = train, model= model, optimizer = optimizer)

train_time = timeSince(start)
#epoch_loss.append(train_loss)
avg_loss = np.mean(train_loss)
Expand Down Expand Up @@ -176,5 +181,6 @@ def epochs_run(epochs, train, valid, test, model, optimizer, shuffle = True, whi
print2file(pFile, logFile)
print(colored('BestValidAuc %f has a TestAuc of %f at epoch %d ' % (bestValidAuc, bestTestAuc, bestValidEpoch),'green'))
print(colored('Details see ../models/%sEHRmodel.log' %(model_prefix + model_customed),'green'))



0 comments on commit 6cb003a

Please sign in to comment.