<a href="https://colab.research.google.com/github/joanna-regan/CS598_DL4H_StageNet/blob/main/JR_FinalProject_train_ablation2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Getting us to the appropriate directory:

In [1]:
import os
os.chdir('/content/drive/MyDrive/UIUC/CS598_DL4H/Colab_Notebooks/JR_StageNet/StageNet')
os.getcwd()

'/content/drive/MyDrive/UIUC/CS598_DL4H/Colab_Notebooks/JR_StageNet/StageNet'

Initial setups:

In [2]:
import numpy as np
import argparse
import os
import imp
import re
import pickle
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
from time import perf_counter
import datetime as dt
from datetime import datetime

RANDOM_SEED = 12345
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

import torch
from torch import nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils import data
from torch.autograd import Variable
import torch.nn.functional as F

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic=True

from utils import utils
from utils.readers import DecompensationReader
from utils.preprocessing import Discretizer, Normalizer
from utils import metrics
from utils import common_utils
from model_ablation2 import StageNet_II

Confirm we're using GPU runtime:

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu')
print("available device: {}".format(device))

available device: cuda:0


Define and load arguments:

In [4]:
def parse_arguments(parser):
    parser.add_argument('--test_mode', type=int, default=0, help='Test SA-CRNN on MIMIC-III dataset')
    parser.add_argument('--data_path', type=str, metavar='<data_path>', help='The path to the MIMIC-III data directory')
    parser.add_argument('--file_name', type=str, metavar='<data_path>', help='File name to save model')
    parser.add_argument('--small_part', type=int, default=0, help='Use part of training data')
    parser.add_argument('--batch_size', type=int, default=128, help='Training batch size')
    parser.add_argument('--epochs', type=int, default=50, help='Training epochs')
    parser.add_argument('--lr', type=float, default=0.001, help='Learing rate')

    parser.add_argument('--input_dim', type=int, default=76, help='Dimension of visit record data')
    parser.add_argument('--rnn_dim', type=int, default=384, help='Dimension of hidden units in RNN')
    parser.add_argument('--output_dim', type=int, default=1, help='Dimension of prediction target')
    parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate')
    parser.add_argument('--dropconnect_rate', type=float, default=0.5, help='Dropout rate in RNN')
    parser.add_argument('--dropres_rate', type=float, default=0.3, help='Dropout rate in residue connection')
    parser.add_argument('--K', type=int, default=10, help='Value of hyper-parameter K')
    parser.add_argument('--chunk_level', type=int, default=3, help='Value of hyper-parameter K')

    parser.add_argument('-f')

    args = parser.parse_args()
    return args

parser = argparse.ArgumentParser()
args = parse_arguments(parser)
#args = parser.parse_args()

#JR add in my paths:
args.data_path = './data/'
args.file_name = 'trained_model_ablation2'
args.epochs = 50
args.small_part = 5000

Load the train and validation data:

On local device, this took about 12 minutes for train and 1 minute for val.
On Google Colab, should take approxiamtely 2-3 minutes to load both (with small_part = 5000). If running for over 5 minutes, recommend stopping the cell execution and rerunning.

In [6]:
print('Preparing training data ... ')
start_time = perf_counter()

train_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir=os.path.join(args.data_path, 'train_subdivided'), 
                                                                   listfile=os.path.join(args.data_path, 'train_listfile.csv'),
                                                                   small_part=args.small_part)
timer1 = perf_counter()
val_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir=os.path.join(args.data_path, 'train_subdivided'), 
                                                                 listfile=os.path.join(args.data_path, 'val_listfile.csv'),
                                                                 small_part=args.small_part)
end_time = perf_counter()
        
print("Time to load train data: " + str(dt.timedelta(seconds = timer1 - start_time))) #time to generate the train data loader
print("Time to load validation data: " + str(dt.timedelta(seconds = end_time - timer1))) #time to generate the val data loader

print("Size of training set: " + str(len(train_data_loader._data["X"])))
print("Size of validation set: " + str(len(val_data_loader._data["X"])))

Preparing training data ... 
Generating data...
Generating data...
Time to load train data: 0:03:51.079198
Time to load validation data: 0:05:56.095442
Size of training set: 5000
Size of validation set: 556


In [7]:
#print(len(train_data_loader._data["X"]))
print(len(train_data_loader._data["X"]))

5000


Instantiate Discretizer and Normalizer objects:

In [8]:
discretizer = Discretizer(timestep=1.0, store_masks=True,
                                impute_strategy='previous', start_time='zero')

discretizer_header = discretizer.transform(train_data_loader._data["X"][0])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)
normalizer_state = 'decomp_normalizer'
normalizer_state = os.path.join(os.path.dirname(args.data_path), normalizer_state)
normalizer.load_params(normalizer_state)

Load the Batched Generators:

On local machine, this took about 7 minutes for train and 1 minute for validation.
On Google Colab, should only take 30 seconds for small_part = 5000.

In [9]:
print("Preparing Batched Data Loaders")

start_time = perf_counter()
train_data_gen = utils.BatchGenDeepSupervision(train_data_loader, 
                                               discretizer, normalizer, 
                                               args.batch_size, 
                                               shuffle=True, 
                                               return_names=True)
timer1 = perf_counter()
val_data_gen = utils.BatchGenDeepSupervision(val_data_loader, 
                                             discretizer, 
                                             normalizer, 
                                             args.batch_size, 
                                             shuffle=False, 
                                             return_names=True)
end_time = perf_counter()
        
print("Time to load train generator: " + str(dt.timedelta(seconds = timer1 - start_time))) #time to generate the train data gen
print("Time to load validation generator: " + str(dt.timedelta(seconds = end_time - timer1))) #time to generate the val data gen

Preparing Batched Data Loaders
Time to load train generator: 0:00:24.335172
Time to load validation generator: 0:00:02.787696


Model Construction:

In [10]:
print('Constructing model ... ')
device = torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu')
print("available device: {}".format(device))

model = StageNet_II(args.input_dim+17, args.rnn_dim, args.K, args.output_dim, args.chunk_level, args.dropconnect_rate, args.dropout_rate, args.dropres_rate).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)


Constructing model ... 
available device: cuda:0


Training:

In [11]:
print('Start training ... ')

train_loss = []
val_loss = []
batch_loss = []
max_auprc = 0
val_auroc = []
val_auprc = []
acc = []
prec0 = []
prec1 = []
rec0 = []
rec1 = []
minpse = []

my_epoch_times = []

file_name = './saved_weights/' + args.file_name + datetime.now().strftime("%d%m%Y_%H_%M")
for each_chunk in range(args.epochs):
    epoch_starttime = perf_counter()
    cur_batch_loss = []
    model.train()
    for each_batch in range(train_data_gen.steps):
        batch_data = next(train_data_gen)
        batch_name = batch_data['names']
        batch_data = batch_data['data']

        batch_x = torch.tensor(batch_data[0][0], dtype=torch.float32).to(device)
        batch_mask = torch.tensor(batch_data[0][1], dtype=torch.float32).unsqueeze(-1).to(device)
        batch_y = torch.tensor(batch_data[1], dtype=torch.float32).to(device)
        tmp = torch.zeros(batch_x.size(0),17, dtype=torch.float32).to(device)
        batch_interval = torch.zeros((batch_x.size(0),batch_x.size(1),17), dtype=torch.float32).to(device)
        
        for i in range(batch_x.size(1)):
            cur_ind = batch_x[:,i,-17:]
            tmp+=(cur_ind == 0).float()
            batch_interval[:, i, :] = cur_ind * tmp
            tmp[cur_ind==1] = 0        
        
        if batch_mask.size()[1] > 400:
            batch_x = batch_x[:, :400, :]
            batch_mask = batch_mask[:, :400, :]
            batch_y = batch_y[:, :400, :]
            batch_interval = batch_interval[:, :400, :]

        batch_x = torch.cat((batch_x, batch_interval), dim=-1)
        batch_time = torch.ones((batch_x.size(0), batch_x.size(1)), dtype=torch.float32).to(device)

        optimizer.zero_grad()
        cur_output = model(batch_x, batch_time, device)
        masked_output = cur_output * batch_mask 
        loss = batch_y * torch.log(masked_output + 1e-7) + (1 - batch_y) * torch.log(1 - masked_output + 1e-7)
        loss = torch.sum(loss, dim=1) / torch.sum(batch_mask, dim=1)
        loss = torch.neg(torch.sum(loss))
        cur_batch_loss.append(loss.cpu().detach().numpy())

        loss.backward()
        optimizer.step()
        
        if each_batch % 50 == 0:
            print('Chunk %d, Batch %d: Loss = %.4f'%(each_chunk, each_batch, cur_batch_loss[-1]))

    batch_loss.append(cur_batch_loss)
    train_loss.append(np.mean(np.array(cur_batch_loss)))
    
    epoch_endtime = perf_counter()
    my_epoch_times.append(epoch_endtime - epoch_starttime)

    print('Epoch training time: ' + str(dt.timedelta(seconds = epoch_endtime - epoch_starttime)))
    
    print("\n==>Predicting on validation")
    with torch.no_grad():
        model.eval()
        cur_val_loss = []
        valid_true = []
        valid_pred = []
        for each_batch in range(val_data_gen.steps):
            valid_data = next(val_data_gen)
            valid_name = valid_data['names']
            valid_data = valid_data['data']
            
            valid_x = torch.tensor(valid_data[0][0], dtype=torch.float32).to(device)
            valid_mask = torch.tensor(valid_data[0][1], dtype=torch.float32).unsqueeze(-1).to(device)
            valid_y = torch.tensor(valid_data[1], dtype=torch.float32).to(device)
            tmp = torch.zeros(valid_x.size(0),17, dtype=torch.float32).to(device)
            valid_interval = torch.zeros((valid_x.size(0),valid_x.size(1),17), dtype=torch.float32).to(device)
            
            for i in range(valid_x.size(1)):
                cur_ind = valid_x[:,i,-17:]
                tmp+=(cur_ind == 0).float()
                valid_interval[:, i, :] = cur_ind * tmp
                tmp[cur_ind==1] = 0  
            
            if valid_mask.size()[1] > 400:
                valid_x = valid_x[:, :400, :]
                valid_mask = valid_mask[:, :400, :]
                valid_y = valid_y[:, :400, :]
                valid_interval = valid_interval[:, :400, :]
            
            valid_x = torch.cat((valid_x, valid_interval), dim=-1)
            valid_time = torch.ones((valid_x.size(0), valid_x.size(1)), dtype=torch.float32).to(device)
            
            valid_output = model(valid_x, valid_time, device)
            masked_valid_output = valid_output * valid_mask

            valid_loss = valid_y * torch.log(masked_valid_output + 1e-7) + (1 - valid_y) * torch.log(1 - masked_valid_output + 1e-7)
            valid_loss = torch.sum(valid_loss, dim=1) / torch.sum(valid_mask, dim=1)
            valid_loss = torch.neg(torch.sum(valid_loss))
            cur_val_loss.append(valid_loss.cpu().detach().numpy())

            for m, t, p in zip(valid_mask.cpu().numpy().flatten(), valid_y.cpu().numpy().flatten(), valid_output.cpu().detach().numpy().flatten()):
                if np.equal(m, 1):
                    valid_true.append(t)
                    valid_pred.append(p)

        val_loss.append(np.mean(np.array(cur_val_loss)))
        print('Valid loss = %.4f'%(val_loss[-1]))
        print('\n')
        valid_pred = np.array(valid_pred)
        valid_pred = np.stack([1 - valid_pred, valid_pred], axis=1)
        ret = metrics.print_metrics_binary(valid_true, valid_pred)
        print()

        cur_auprc = ret['auprc']

        val_auprc.append(cur_auprc)
        val_auroc.append(ret['auroc'])
        acc.append(ret['acc'])
        prec0.append(ret['prec0'])
        prec1.append(ret['prec1'])
        rec0.append(ret['rec0'])
        rec1.append(ret['rec1'])
        minpse.append(ret['minpse'])
        
        if cur_auprc > max_auprc:
            max_auprc = cur_auprc
            state = {
                'net': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'chunk': each_chunk
            }
            torch.save(state, file_name)
            print('\n------------ Save best model ------------\n')

Start training ... 
Chunk 0, Batch 0: Loss = 89.4169
Epoch training time: 0:00:43.798410

==>Predicting on validation
Valid loss = 18.8786


confusion matrix:
[[35076     0]
 [  714     0]]


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


accuracy = 0.980050265789032
precision class 0 = 0.980050265789032
precision class 1 = nan
recall class 0 = 1.0
recall class 1 = 0.0
AUC of ROC = 0.7750624454366078
AUC of PRC = 0.06189122145153009
min(+P, Se) = 0.0836104513064133


------------ Save best model ------------

Chunk 1, Batch 0: Loss = 22.5575
Epoch training time: 0:00:39.197561

==>Predicting on validation
Valid loss = 17.0795


confusion matrix:
[[34881   195]
 [  652    62]]
accuracy = 0.9763341546058655
precision class 0 = 0.9816508889198303
precision class 1 = 0.24124513566493988
recall class 0 = 0.9944406151771545
recall class 1 = 0.08683473616838455
AUC of ROC = 0.7976602546595101
AUC of PRC = 0.10342087841724547
min(+P, Se) = 0.20868347338935575


------------ Save best model ------------

Chunk 2, Batch 0: Loss = 19.2811
Epoch training time: 0:00:39.055060

==>Predicting on validation
Valid loss = 10.5063


confusion matrix:
[[34815   261]
 [  594   120]]
accuracy = 0.9761106371879578
precision class 0 = 0.983224



---



Evaluate on Test Data

Load last checkpoint:

In [12]:
checkpoint = torch.load(file_name)
save_chunk = checkpoint['chunk']
print("last saved model is in chunk {}".format(save_chunk))
model.load_state_dict(checkpoint['net'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.eval()

last saved model is in chunk 6


StageNet_II(
  (kernel): Linear(in_features=94, out_features=1542, bias=True)
  (recurrent_kernel): Linear(in_features=385, out_features=1542, bias=True)
  (nn_scale): Linear(in_features=384, out_features=64, bias=True)
  (nn_rescale): Linear(in_features=64, out_features=384, bias=True)
  (nn_conv): Conv1d(384, 384, kernel_size=(10,), stride=(1,))
  (nn_output): Linear(in_features=384, out_features=1, bias=True)
  (nn_dropconnect): Dropout(p=0.5, inplace=False)
  (nn_dropconnect_r): Dropout(p=0.5, inplace=False)
  (nn_dropout): Dropout(p=0.5, inplace=False)
  (nn_dropres): Dropout(p=0.3, inplace=False)
)

Load test data.

Should take ~1 minute to run. If running for 5+ minutes, stop and rerun the cell.

In [15]:
start_data_loads = perf_counter()

test_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir=os.path.join(args.data_path, 'test'), listfile=os.path.join(args.data_path, 'test_listfile.csv'), small_part=args.small_part)
timer1 = perf_counter()

test_data_gen = utils.BatchGenDeepSupervision(test_data_loader, discretizer, normalizer, args.batch_size, shuffle=False, return_names=True)
end_data_loads = perf_counter()

print("Time to load test data: " + str(dt.timedelta(seconds = timer1 - start_data_loads)))
print("Time to load test generator: " + str(dt.timedelta(seconds = end_data_loads - timer1)))

Generating data...
Time to load test data: 0:01:14.960870
Time to load test generator: 0:00:07.175735


Test the model:

In [16]:
print('Testing model ... ')
print('Checkpoint to be loaded: ')
print(file_name)

checkpoint = torch.load(file_name)
save_chunk = checkpoint['chunk']
print("last saved model is in chunk {}".format(save_chunk))
model.load_state_dict(checkpoint['net'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.eval()

start_time = perf_counter()
#test_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir=os.path.join(args.data_path, 'test'),
#                                                                listfile=os.path.join(args.data_path, 'test_listfile.csv'), small_part=args.small_part)

test_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir=os.path.join(args.data_path, 'test'),
                                                          listfile=os.path.join(args.data_path, 'test_listfile.csv'),
                                                          small_part=args.small_part)


timer1 = perf_counter()
test_data_gen = utils.BatchGenDeepSupervision(test_data_loader, discretizer,
                                            normalizer, args.batch_size,
                                            shuffle=False, return_names=True)
end_time = perf_counter()

print("Time to load test data: " + str(dt.timedelta(seconds = timer1 - start_time)))
print("Time to load test generator: " + str(dt.timedelta(seconds = end_time - timer1)))
print("Size of test set: " + str(len(test_data_loader._data["X"])))

with torch.no_grad():
    torch.manual_seed(RANDOM_SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(RANDOM_SEED)

    cur_test_loss = []
    test_true = []
    test_pred = []
    
    for each_batch in range(test_data_gen.steps):
        test_data = next(test_data_gen)
        test_name = test_data['names']
        test_data = test_data['data']

        test_x = torch.tensor(test_data[0][0], dtype=torch.float32).to(device)
        test_mask = torch.tensor(test_data[0][1], dtype=torch.float32).unsqueeze(-1).to(device)
        test_y = torch.tensor(test_data[1], dtype=torch.float32).to(device)
        tmp = torch.zeros(test_x.size(0),17, dtype=torch.float32).to(device)
        test_interval = torch.zeros((test_x.size(0),test_x.size(1),17), dtype=torch.float32).to(device)

        for i in range(test_x.size(1)):
            cur_ind = test_x[:,i,-17:]
            tmp+=(cur_ind == 0).float()
            test_interval[:, i, :] = cur_ind * tmp
            tmp[cur_ind==1] = 0  
        
        if test_mask.size()[1] > 400:
            test_x = test_x[:, :400, :]
            test_mask = test_mask[:, :400, :]
            test_y = test_y[:, :400, :]
            test_interval = test_interval[:, :400, :]
        
        test_x = torch.cat((test_x, test_interval), dim=-1)
        test_time = torch.ones((test_x.size(0), test_x.size(1)), dtype=torch.float32).to(device)
        
        test_output = model(test_x, test_time, device)
        masked_test_output = test_output * test_mask

        test_loss = test_y * torch.log(masked_test_output + 1e-7) + (1 - test_y) * torch.log(1 - masked_test_output + 1e-7)
        test_loss = torch.sum(test_loss, dim=1) / torch.sum(test_mask, dim=1)
        test_loss = torch.neg(torch.sum(test_loss))
        cur_test_loss.append(test_loss.cpu().detach().numpy()) 
        
        for m, t, p in zip(test_mask.cpu().numpy().flatten(), test_y.cpu().numpy().flatten(), test_output.cpu().detach().numpy().flatten()):
            if np.equal(m, 1):
                test_true.append(t)
                test_pred.append(p)
    
    print('Test loss = %.4f'%(np.mean(np.array(cur_test_loss))))
    print('\n')
    test_pred = np.array(test_pred)
    test_pred = np.stack([1 - test_pred, test_pred], axis=1)
    test_ret = metrics.print_metrics_binary(test_true, test_pred)

Testing model ... 
Checkpoint to be loaded: 
./saved_weights/trained_model_ablation207052023_21_11
last saved model is in chunk 6
Generating data...
Time to load test data: 0:00:05.707415
Time to load test generator: 0:00:06.879582
Size of test set: 1389
Test loss = 10.1016


confusion matrix:
[[84884   360]
 [ 1181   270]]
accuracy = 0.9822250604629517
precision class 0 = 0.9862778186798096
precision class 1 = 0.4285714328289032
recall class 0 = 0.9957768321037292
recall class 1 = 0.18607856333255768
AUC of ROC = 0.8444596596607215
AUC of PRC = 0.21078365808954203
min(+P, Se) = 0.27980702963473464


---

Getting stats from datasets:




In [17]:
import os
import numpy as np
from collections import Counter

my_subject_ids = []
my_length_of_stays = []
test_labels = []
val_labels = []
train_labels = []

for i in range(len(test_data_loader._data["X"])):
    my_subject_ids.append(test_data_loader._data["name"][i].split('_')[0])
    my_length_of_stays.append(test_data_loader._data["ts"][i][-1])
    test_labels.append(test_data_loader._data["ys"][i])
for i in range(len(val_data_loader._data["X"])):
    my_subject_ids.append(val_data_loader._data["name"][i].split('_')[0])
    my_length_of_stays.append(val_data_loader._data["ts"][i][-1])
    val_labels.append(val_data_loader._data["ys"][i])
for i in range(len(train_data_loader._data["X"])):
    my_subject_ids.append(train_data_loader._data["name"][i].split('_')[0])
    my_length_of_stays.append(train_data_loader._data["ts"][i][-1])
    train_labels.append(train_data_loader._data["ys"][i])

In [18]:
print("Total number of ICU stays: " + str(len(my_subject_ids)))
print("Total number of patients: " + str(len(list(set(my_subject_ids)))))
print("--- max number of stays per patient: " + str(max(Counter(my_subject_ids).values())))
print("--- max length of stay: " + str(max(my_length_of_stays)))
#print("--- max length of stay in days: " + str(max(my_length_of_stays)/24))
print("--- min number of stays per patient: " + str(min(Counter(my_subject_ids).values())))
print("--- min length of stay: " + str(min(my_length_of_stays)))
print("--- average number of stays per patient: " + str(sum(Counter(my_subject_ids).values()) / len(Counter(my_subject_ids))))
print("--- average length of stay: " + str(sum(my_length_of_stays) / len(my_length_of_stays)))

Total number of ICU stays: 6945
Total number of patients: 5594
--- max number of stays per patient: 17
--- max length of stay: 2391.0
--- min number of stays per patient: 1
--- min length of stay: 5.0
--- average number of stays per patient: 1.2415087593850553
--- average length of stay: 70.56573074154068


In [19]:
#Breakdowns of train/test/val
total_test_visits = 0
total_pos_test_visits = 0
total_val_visits = 0
total_pos_val_visits = 0
total_train_visits = 0
total_pos_train_visits = 0

for i in range(len(test_labels)):
    test_labels[i] = [int(x) for x in test_labels[i]]
    total_test_visits += len(test_labels[i])
    total_pos_test_visits += sum(test_labels[i])
for i in range(len(val_labels)):
    val_labels[i] = [int(x) for x in val_labels[i]]
    total_val_visits += len(val_labels[i])
    total_pos_val_visits += sum(val_labels[i])
for i in range(len(train_labels)):
    train_labels[i] = [int(x) for x in train_labels[i]]
    total_train_visits += len(train_labels[i])
    total_pos_train_visits += sum(train_labels[i])

total_visits = total_test_visits + total_val_visits + total_train_visits
total_pos_visits = total_pos_test_visits + total_pos_val_visits + total_pos_train_visits

In [20]:
#Total number of visits for *loaded* subsets:
print("Total number of visits (loaded subset): " + str(total_visits) + ", Positive samples: " + str(total_pos_visits))
 
print("--Train--")
print("Number of stays: " + str(len(train_data_loader._data["X"])) + ", Number of visits: " + str(total_train_visits) + ", Number positive visits: " + str(total_pos_train_visits))
print("--Validation--")
print("Number of stays: " + str(len(val_data_loader._data["X"])) + ", Number of visits: " + str(total_val_visits) + ", Number positive visits: " + str(total_pos_val_visits))
print("--Test--")
print("Number of stays: " + str(len(test_data_loader._data["X"])) + ", Number of visits: " + str(total_test_visits) + ", Number positive visits: " + str(total_pos_test_visits))


Total number of visits (loaded subset): 462289, Positive samples: 9544
--Train--
Number of stays: 5000, Number of visits: 329751, Number positive visits: 7307
--Validation--
Number of stays: 556, Number of visits: 38663, Number positive visits: 714
--Test--
Number of stays: 1389, Number of visits: 93875, Number positive visits: 1523


---

Printing some variables to be used for reporting




In [21]:
print(f"args: ", args)
print(f"my_epoch_times: ", my_epoch_times)
print(f"train_loss: ", train_loss)
print(f"val_loss: ", val_loss)
#print(f"batch_loss: ", batch_loss)
print(f"val_auroc: ", val_auroc)
print(f"val_auprc: ", val_auprc)
print(f"acc: ", acc)
print(f"prec0: ", prec0)
print(f"prec1: ", prec1)
print(f"rec0: ", rec0)
print(f"rec1: ", rec1)
print(f"minpse: ", minpse)
print("---")
print(f"max_auprc: ", max_auprc)
print(f"best_epoch: ", save_chunk)

args:  Namespace(test_mode=0, data_path='./data/', file_name='trained_model_ablation2', small_part=5000, batch_size=128, epochs=50, lr=0.001, input_dim=76, rnn_dim=384, output_dim=1, dropout_rate=0.5, dropconnect_rate=0.5, dropres_rate=0.3, K=10, chunk_level=3, f='/root/.local/share/jupyter/runtime/kernel-f5a5045e-e313-47bd-9e63-93722646dbfc.json')
my_epoch_times:  [43.79840974699982, 39.197561482999845, 39.05506006399992, 39.58472141999937, 38.52912911000021, 38.419916741000634, 39.370676523999464, 38.93925171400042, 39.26577687400004, 39.714313759000106, 40.51470758200048, 39.63377812999988, 39.56421609500012, 40.026871377000134, 40.01277177300017, 40.568097940999905, 39.15516745599962, 39.93256628099971, 40.81712327000059, 39.19181686800039, 39.70535662199927, 39.446075768999435, 40.271870542000215, 40.02906615699976, 39.70771315999991, 40.35813434300053, 40.62615116500001, 39.81542464799986, 39.70161929099959, 39.84622990200023, 38.85346423899955, 38.74908194300042, 39.382716133000