In [None]:
from __future__ import print_function, division
%load_ext autoreload

In [None]:
%autoreload

import copy, math, os, pickle, time, sys, os, random, argparse, pandas as pd, numpy as np, scipy.stats as ss
import pathlib

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, precision_score, f1_score, recall_score
from sklearn import preprocessing

import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.nn.utils import weight_norm

import multiprocessing
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt

from sklearn.model_selection import StratifiedKFold

In [None]:
'''
Single CUDA device, comment for directions on multiple GPU
'''
#os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
'''
See CUDA versions, CUDA must be available for GPU acceleration
'''
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

In [None]:
'''
Single CUDA device, comment for directions on multiple GPU
'''
torch.cuda.get_device_name(0)
# torch.cuda.get_device_name(1)
# torch.cuda.get_device_name(2)
# torch.cuda.get_device_name(3)

In [None]:
'''
See github directions on getting data access from Physionet.org
'''
DATA_FILEPATH     = "./data/all_hourly_data.h5"
#RAW_DATA_FILEPATH = './all_hourly_data.h5'
GAP_TIME          = 6  # In hours
WINDOW_SIZE       = 24 # In hours
SEED              = 1
ID_COLS           = ['subject_id', 'hadm_id', 'icustay_id']
RESULTS_DIR     = "./results/"
PROCESSED_DATA_DIR = "./processed_data/"

def set_primary_seeds(seed):
    print("Setting primary seeds...")
    if not seed:
        seed = 1
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)  # for multiple GPUs.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
set_primary_seeds(SEED)

In [None]:
'''
Some tools from: https://github.com/MLforHealth/MIMIC_Extract/tree/master/notebooks
'''
class DictDist():
    def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs
    def rvs(self, n):
        a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}
        out = []
        for i in range(n): out.append({k: vs[i] for k, vs in a.items()})
        return out
    
class Choice():
    def __init__(self, options): self.options = options
    def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]

In [None]:
%%time
data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')
statics        = pd.read_hdf(DATA_FILEPATH, 'patients')

In [None]:
'''
Some tools from: https://github.com/MLforHealth/MIMIC_Extract/tree/master/notebooks

“Simple Imputation” scheme outlined in Che et al.:

Zhengping Che, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan
Liu. 2018. Recurrent Neural Networks for Multivariate Time Series with Missing
Values. Scientific Reports 8, 1 (2018).

'''
def simple_imputer(df):
    idx = pd.IndexSlice
    df = df.copy()
    if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))
    
    df_out = df.loc[:, idx[:, ['mean', 'count']]]
    icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()
    
    df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(
        method='ffill'
    ).groupby(ID_COLS).fillna(icustay_means).fillna(0)
    
    df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)
    df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)
    
    is_absent = (1 - df_out.loc[:, idx[:, 'mask']])
    hours_of_absence = is_absent.cumsum()
    time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')
    time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)

    df_out = pd.concat((df_out, time_since_measured), axis=1)
    df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)
    
    df_out.sort_index(axis=1, inplace=True)
    
    return df_out

In [None]:
'''
Data preprocessing to define 3-day and 7-day length of stay outcome labels
'''
Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]
Ys['los_3'] = Ys['los_icu'] > 3
Ys['los_7'] = Ys['los_icu'] > 7
# Ys.drop(columns=['los_icu'], inplace=True)
Ys.astype(float)

lvl2, raw = [df[
    (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &
    (df.index.get_level_values('hours_in') < WINDOW_SIZE)
] for df in (data_full_lvl2, data_full_lvl2)]

### START: LOAD TCN SOURCE

In [None]:
'''
TCN source from https://github.com/locuslab/TCN
'''
# removes (k-1) elements from the output on the right. Ensures causality. We first pad and then chomp.
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

In [None]:
'''
TCN source from https://github.com/locuslab/TCN
'''
# a single temporal layer/block
class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

In [None]:
'''
TCN source from https://github.com/locuslab/TCN
'''

# Create different layers with different dilation sizes
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
#         self.output_hook = OutputHook()
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            next_block = TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)
#             next_block.relu1.register_forward_hook(self.output_hook)
#             next_block.relu2.register_forward_hook(self.output_hook)
            layers += [next_block]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


In [None]:
'''
TCN source from https://github.com/locuslab/TCN
'''

# a TCN mode with softmax for classification
# input size = number of features
# output size = 2
# num_channels = nhid
class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)
        self.sig = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        # N -> 
        # x needs to have dimension (N, C, L) in order to be passed into CNN
#         print("Input shape 1")
#         print(x.shape)
#         print(torch.transpose(x,0,1).shape)
#         output = self.tcn(x.transpose(1, 2)).transpose(1, 2)
        #x = torch.transpose(x, 0, 1)
        x = torch.unsqueeze(x, 0)
#         print("Input shape 2")
#         print(x.shape)
#         print(x.shape)
#                 output = self.tcn(x.transpose(1, 2)).transpose(1, 2)
#         output = self.tcn(x)
#         output = self.linear(output).double()
        x = x.transpose(1, 2)
#         print("Input shape 3")
#         print(x.shape)

        # x needs to have dimension (N, C, L) in order to be passed into CNN
        output = self.tcn(x).transpose(1, 2)
        output = self.linear(output).double()
#         print("Output shape")
#         print(output.shape)
#         print("output opt1: ", F.log_softmax(output))
#         print("output opt2: ", F.log_softmax(output,dim=1))
        #return output

    
        return self.sig(output), output, self.tanh(output)
        #return self.sig(output)
        #return F.log_softmax(output, dim=1)

## Model Training & Evaluation

In [None]:
'''
See inpute features (mean and std used for normalization)
'''
list(data_full_lvl2)

In [None]:
# get subjects that were admited before and after 2180 [2100,2200]
# This selected to provide split for simulation prospective study. Years encoded during anonymization per MIMIC-Extract
Ys['admittime'] = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['admittime']]
split_date = np.datetime64('2180-01-01')
Ys_int = Ys[Ys['admittime']<split_date]
Ys_ext = Ys[Ys['admittime']>=split_date]
subjects_int = Ys_int.index
subjects_ext = Ys_ext.index
del(Ys_int)
del(Ys_ext)
Ys.drop(labels='admittime',axis=1,inplace=True)
Ys.astype(float)

In [None]:
'''
More data processing
'''
# standardize and impute
lvl2_subj_idx,  Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, Ys)]
lvl2_subjects = set(lvl2_subj_idx)
assert lvl2_subjects == set(Ys_subj_idx), "Subject ID pools differ!"

# shuffle the dataset
subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)

# standardize the whole dataset
idx = pd.IndexSlice
lvl2_means, lvl2_stds = lvl2.loc[:, idx[:,'mean']].mean(axis=0), lvl2.loc[:, idx[:,'mean']].std(axis=0)
lvl2.loc[:, idx[:,'mean']] = (lvl2.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds

# impute missing values
lvl2 = simple_imputer(lvl2)

In [None]:
'''
"external" is a bad term for the test set in simulated prospective
'''
### Split internal and sets datasets
idx = pd.IndexSlice
X_int = lvl2.loc[idx[subjects_int.get_level_values('subject_id')],:]
X_ext = lvl2.loc[idx[subjects_ext.get_level_values('subject_id')],:]
Y_int = Ys.loc[idx[subjects_int.get_level_values('subject_id')],:]
Y_ext = Ys.loc[idx[subjects_ext.get_level_values('subject_id')],:]

print("X internal shape: (%d,%d)"%(X_int.shape[0]/24,X_int.shape[1]))
print("X external shape: (%d,%d)"%(X_ext.shape[0]/24,X_ext.shape[1]))
print("Y internal shape: (%d,%d)"%(Y_int.shape[0],Y_int.shape[1]))
print("Y external shape: (%d,%d)"%(Y_ext.shape[0],Y_ext.shape[1]))

all_int_subjects = list(
    np.random.permutation(Y_int.index.get_level_values('subject_id').values)
)
print("X internal shape: (%d,%d)"%(X_int.shape[0]/24,X_int.shape[1]))
print("X external shape: (%d,%d)"%(X_ext.shape[0]/24,X_ext.shape[1]))
print("Y internal shape: (%d,%d)"%(Y_int.shape[0],Y_int.shape[1]))
print("Y external shape: (%d,%d)"%(Y_ext.shape[0],Y_ext.shape[1]))

In [None]:
'''
Some helper data structures to store and save predictions
'''
class Logger():
    def __init__(self, optional_cols=None):
        #self.n_samples=n_samples
        self.columns=['task_name','fold','prediction_no','index','y_true','y_score','censoring']
        if (optional_cols is None):
            self.df=pd.DataFrame(columns=self.columns)
            self.has_optional_cols=False
        else:
            self.df=pd.DataFrame(columns=self.columns+optional_cols)
            self.has_optional_cols=True
            self.optional_cols=optional_cols
        self._rocs=[]
        self._prediction_no=0
        return
    
    def append_logger(self,indices, y_true, y_score, label, censoring=None, optional_dict=None,fold=0):
        y_true=np.array(y_true).astype(int)
        y_score=np.array(y_score).astype(float)
        

        if ((y_true.shape[0]!=y_score.shape[0])):
            raise ValueError("Shapes of input matrices must match")
        
            
        self._n=y_true.shape[0]

        if(censoring is None):
            cens = self._n*[math.nan]
            censoring=np.array(censoring)
        else:
            cens=censoring
        
        arr=np.array([self._n*[label],
                      self._n*[fold],
                      self._n*[self._prediction_no],
                      list(indices),
                      list(y_true),
                      list(y_score),
                      list(cens)
              ]).transpose()

        to_append=pd.DataFrame(arr, columns=self.columns)
        if(self.has_optional_cols):
            
            for column, value in optional_dict.items():
                to_append.loc[:,column]=value

        self.df=self.df.append(to_append)
        self._prediction_no=self._prediction_no+1
        
def preds_df_to_int(df):
    df_test = df
    type_dict = {}
    cast=['fold','prediction_no','y_true']
    for col in cast:
        type_dict[col] = 'int64'
    df_test = df_test.astype(dtype=type_dict)
    return df_test

In [None]:
'''
Primary training method for the TCN model
'''
def TCN_Train_Model(
    model, train_dataloader, valid_dataloader, num_epochs = 300, patience = 3, min_delta = 1e-7, learning_rate=1e-3, lr_deg=1, batch_size=64,\
    clip=0, loss_function=1, clamp=0, activity_reg=0,l2_penalty_bool=0, l2_penalty_val=0., amsgrad=0, output_last=True
):
    
    print('Start Training ... ')

    loss_MSE = torch.nn.MSELoss()
    loss_nll = torch.nn.NLLLoss()
    loss_CEL = torch.nn.CrossEntropyLoss()
    loss_L1 = torch.nn.L1Loss()
    loss_BCE = torch.nn.BCELoss()
    loss_BCE_logits = torch.nn.BCEWithLogitsLoss()
    loss_hinge = torch.nn.HingeEmbeddingLoss()
    
    loss_MSE = torch.nn.MSELoss()
    
#     learning_rate = 0.001
#     optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate, alpha=0.99)
#     use_gpu = False#torch.cuda.is_available()
    
    # weight decay seems broken, returning nans (06/21/21)
    l2_penalty=0.0
    if l2_penalty_bool:
        l2_penalty = l2_penalty_val
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
        model = model.to(device)
        use_gpu = True
        if torch.cuda.device_count()==1:
            print("CUDA is available, 1 device recognized")
#             model = model.to(device)
        else:
            model = nn.DataParallel(model, device_ids=[0,1,2,3])
            #model = nn.parallel.DistributedDataParallel(model, device_ids=[0,1,2,3])
            print("CUDA is available, multiple devices recognized")
    else:
        #cuda_available = 0
        use_gpu = False
        device = 'cpu'
        print("WARNING: You have a CUDA device, so you should probably change cuda_available to 1")
    
    interval = 100
    losses_train = []
    losses_valid = []
    losses_epochs_train = []
    losses_epochs_valid = []
    
    cur_time = time.time()
    pre_time = time.time()
    
    # Variables for Early Stopping
    is_best_model = 0
    best_model = model
    patient_epoch = 0
    
    for epoch in range(num_epochs):
        trained_number = 0

        lr = learning_rate * (lr_deg) ** epoch # lr_deg 0 < x < inf

        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_penalty, amsgrad=amsgrad)
        
        losses_epoch_train = []
        losses_epoch_valid = []
        
        valid_dataloader_iter = iter(valid_dataloader)
        
        for batch_idx, (X, labels) in enumerate(train_dataloader):
            assert X.size()[0] == batch_size, "Batch Size doesn't match! %s" % str(X.size())
            if use_gpu: X, labels = X.to(device), labels.to(device)
            # pretty sure torch.autograd.Variable() is depreciated but should just return a tensor
            X, labels = Variable(X), Variable(labels)
            model.zero_grad()
            prediction_sig,prediction_noact,prediction_tanh=model(X)
            if output_last:

                if loss_function == 1:
                    prediction = torch.flatten(prediction_noact)
                    loss_train = loss_BCE_logits(prediction.double(), labels.double())
                elif loss_function == 2:
                    prediction = torch.flatten(prediction_tanh)
                    loss_train = loss_hinge(prediction.double(), labels.double())
            else:
#                 full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1)
#                 loss_train = loss_MSE(outputs, full_labels)
                prediction = torch.flatten(prediction_noact)
                loss_train = loss_MSE(prediction.double(), labels.double())

                
            losses_train.append(loss_train.data)
            losses_epoch_train.append(loss_train.data)
            
            optimizer.zero_grad()
            loss_train.backward()
            
            if clip > 1.0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            if clamp:
                with torch.no_grad():
                    for param in model.parameters():
                        param.clamp_(-1, 1)
            optimizer.step()
            
            try: 
                X_val, labels_val = next(valid_dataloader_iter)
            except:
                del valid_dataloader_iter
                valid_dataloader_iter = iter(valid_dataloader)
                X_val, labels_val = next(valid_dataloader_iter)
            model.zero_grad()
            
            if use_gpu: X_val, labels_val = X_val.to(device), labels_val.to(device)

            prediction_val_sig,prediction_val_noact,prediction_val_tanh = model(X_val)
            
            if output_last:

                if loss_function == 1:
                    prediction = torch.flatten(prediction_val_noact)
                    loss_valid = loss_BCE_logits(prediction.double(), labels_val.double())
                elif loss_function == 2:
                    prediction = torch.flatten(prediction_val_tanh)
                    loss_valid = loss_hinge(prediction.double(), labels_val.double())
            else:
#                 raise NotImplementedError("Should be output last!")
#                 full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1)
#                 loss_valid = loss_MSE(outputs_val, full_labels_val)
                prediction = torch.flatten(prediction_val_noact)
                loss_valid = loss_MSE(prediction.double(), labels_val.double())
            
            losses_valid.append(loss_valid.data)
            losses_epoch_valid.append(loss_valid.data)
            trained_number += 1
            
        avg_losses_epoch_train = sum(losses_epoch_train).cpu().numpy() / float(len(losses_epoch_train))
        avg_losses_epoch_valid = sum(losses_epoch_valid).cpu().numpy() / float(len(losses_epoch_valid))
        losses_epochs_train.append(avg_losses_epoch_train)
        losses_epochs_valid.append(avg_losses_epoch_valid)
        

        # Early Stopping
        if epoch == 0:
            is_best_model = 1
            best_model = model
            #torch.save(model.module.state_dict(), 'results/best_model.pt')
            min_loss_epoch_valid = 10000.0
            if avg_losses_epoch_valid < min_loss_epoch_valid:
                min_loss_epoch_valid = avg_losses_epoch_valid
        else:
            if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta:
                is_best_model = 1
                best_model = model
                #torch.save(model.module.state_dict(), 'results/best_model.pt')
                min_loss_epoch_valid = avg_losses_epoch_valid 
                patient_epoch = 0
            else:
                is_best_model = 0
                patient_epoch += 1
                if patient_epoch >= patience:
                    print('Early Stopped at Epoch:', epoch)
                    break
    
        # Print training parameters
        cur_time = time.time()
        print('Epoch: {}, train_loss: {}, valid_loss: {}, time: {}, best model: {}'.format( \
                    epoch, \
                    np.around(avg_losses_epoch_train, decimals=8),\
                    np.around(avg_losses_epoch_valid, decimals=8),\
                    np.around([cur_time - pre_time] , decimals=2),\
                    is_best_model) )
        pre_time = cur_time

        
    del valid_dataloader_iter
                
    return best_model, [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid]
#     return [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid]

In [None]:
'''
Primary validation method for the TCN model
'''
def predict_proba(model, dataloader, loss_function, output_last=True):
    """
    Input:
        model: TCN model
        test_dataloader: containing batches of measurement, measurement_last_obsv, mask, time_, labels
    Returns:
        predictions: size[num_samples, 2]
        labels: size[num_samples]
    """
    
        
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    available_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]

    #cuda_available = 0
    if torch.cuda.is_available():
        use_gpu = True
#         if torch.cuda.device_count()==1:
#             print("CUDA is available, 1 device recognized")
#         else:
#             model = nn.DataParallel(model, device_ids=[0,1,2,3])
#             print("CUDA is available, multiple devices recognized")
        model = model.to(device)
    else:
        #cuda_available = 0
        use_gpu = False
#         print("WARNING: You have a CUDA device, so you should probably change cuda_available to 1")
    
        
    model.eval()
    
    probabilities = []
    labels        = []
    ethnicities   = []
    genders       = []
    ids   = []
    for batch_idx, (X, Y) in enumerate(dataloader):
        if use_gpu: X, Y = X.to(device), Y.to(device)
        # pretty sure torch.autograd.Variable() is depreciated but should just return a tensor
        X, Y = Variable(X), Variable(Y)

        pred_sig, pred_noact, pred_tanh = model(X)
        
        if output_last:
            if loss_function == 1:
                pred = torch.flatten(pred_sig)
            elif loss_function == 2:
                pred = torch.flatten(pred_tanh)
        else:
            pred = torch.flatten(pred_noact)
            
        next_pred = pred.detach().cpu().data.numpy()
        next_lab = Y.detach().cpu().data.numpy()

        probabilities.append(next_pred)
        #probabilities.append(pred.cpu().data.numpy())
        labels.append(next_lab)
    
    return probabilities, labels

In [None]:
'''
PyTorch dataloader and associated tools
'''
def to_3D_tensor(df):
    idx = pd.IndexSlice
    return np.dstack((df.loc[idx[:,:,:,i], :].values for i in sorted(set(df.index.get_level_values('hours_in')))))

def to_2D_tensor(df):
    dl = list()
    for r_idx, row in df.iterrows():
        d = df.loc[r_idx,'data']
        dl.append(d)
    return np.stack(tuple(dl))

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def prepare_dataloader(df, Ys, batch_size, shuffle=True):
    """
    dfs = (df_train, df_dev, df_test).
    df_* = (subject, hadm, icustay, hours_in) X (level2, agg fn \ni {mask, mean, time})
    Ys_series = (subject, hadm, icustay) => label.
    """
    X     = torch.from_numpy(to_3D_tensor(df).astype(np.float32))
    label = torch.from_numpy(Ys.values.astype(np.int64))
    dataset = utils.TensorDataset(X, label)
    return utils.DataLoader(dataset, batch_size=int(batch_size), shuffle=shuffle, drop_last = True)

def prepare_2d_dataloader(df, Ys, batch_size, shuffle=True):
    """
    dfs = (df_train, df_dev, df_test).
    df_* = (subject, hadm, icustay, hours_in) X (level2, agg fn \ni {mask, mean, time})
    Ys_series = (subject, hadm, icustay) => label.
    """
    X     = torch.from_numpy(to_2D_tensor(df).astype(np.float32))
    label = torch.from_numpy(Ys.values.astype(np.int64))
    dataset = utils.TensorDataset(X, label)
    return utils.DataLoader(dataset, batch_size=int(batch_size), shuffle=shuffle, drop_last = True)


def stack_dataframe(df):
    df.columns = df.columns.map('_'.join)
    df2 = pd.DataFrame(index=df.index.droplevel(level=3).drop_duplicates())
    df2['data'] = ''
    for idx, df in df.groupby(level=[0,1,2]):
        data = []
        cols = list(df)
        n = len(df[cols[0]])
        for i in range(n):
            for col in cols:
                k = len(df[col])
                if n != k:
                    print("different len: ", n, "vs. ", k)
                #print(df[col].iloc[i])
                data.append(df[col].iloc[i])
        df2.loc[idx,'data'] = data
        #print(df2.shape)
    return df2

def stack_dataframe_alt(df):
    df.columns = df.columns.map('_'.join)
    df2 = pd.DataFrame(index=df.index.droplevel(level=3).drop_duplicates())
    df2['data'] = ''
    for idx, df in df.groupby(level=[0,1,2]):
        data = []
        cols = list(df)
        n = len(df[cols[0]])
        for col in cols:
            tmp_array = df[col]
            for val in tmp_array:
                data.append(val)
        df2.loc[idx,'data'] = data
    return df2

def ps_spawn(i, shared_dict, df):
    res = stack_dataframe_alt(df)
    shared_dict[i] = res

In [None]:
N = 1
num_features = 312
input_channels = 24*num_features # 1 for 1D data, 2 for 2D Data
n_classes = 1
early_stop_frac = 0.1

'''
Define fixed hyperparameters
'''
TCN_hyperparams_fixed = {
    'input_channels': input_channels, # 1 for 1D data, 2 for 2D Data
    'n_classes': n_classes,
    'early_stop_frac': early_stop_frac,
    'seed': SEED,
}

'''
Define search space for hyperparameter grid search
'''

# Search space from publication: https://www.nature.com/articles/s41598-022-25472-z
'''
dropout_list = [0.8561711076089411] # between 0.80 to 0.85
levels_list = [10, 12]
nhid_list = [250,209]
kernel_size_list = [5,7]
num_epochs_list = [100]
patience_list = [3]
learning_rate_list = [3.0e-5,5.0e-5,7e-5]
lr_deg_list=[1.4979573737643916]
batch_size_list = [128]
loss_function_list = [1]
clamp_list = [1]
clip_list = [1.3773119536842078]
activity_reg_list = [0]
l2_penalty_bool_list = [0]
l2_penalty_val_list = [0.06341248051564571]
ams_grad_list = [1]
'''

# single combination for testing and dev.
dropout_list = [0.8561711076089411] # between 0.80 to 0.85
levels_list = [10]
nhid_list = [250]
kernel_size_list = [5]
num_epochs_list = [100]
patience_list = [3]
learning_rate_list = [3.0e-5]
lr_deg_list=[1.4979573737643916]
batch_size_list = [128]
loss_function_list = [1]
clamp_list = [1]
clip_list = [1.3773119536842078]
activity_reg_list = [0]
l2_penalty_bool_list = [0]
l2_penalty_val_list = [0.06341248051564571]
ams_grad_list = [1]

TCN_hyperparams_list = []

for dropout in dropout_list:
    for levels in levels_list:
        for nhid in nhid_list:
            for kernel_size in kernel_size_list:
                for num_epochs in num_epochs_list:
                    for patience in patience_list:
                        for learning_rate in learning_rate_list:
                            for lr_deg in lr_deg_list:
                                for batch_size in batch_size_list:
                                    for loss_function in loss_function_list:
                                        for clamp in clamp_list:
                                            for clip in clip_list:
                                                for activity_reg in activity_reg_list:
                                                    for l2_penalty_bool in l2_penalty_bool_list:
                                                        for l2_penalty_val in l2_penalty_val_list:
                                                            for ams_grad in ams_grad_list:
                                                                next_params = {'dropout': dropout, 'levels': levels, 'nhid': nhid, 'kernel_size': kernel_size, \
                                                                               'num_epochs': num_epochs, 'patience': patience, 'learning_rate': learning_rate, 'lr_deg':lr_deg,\
                                                                               'batch_size': batch_size, 'loss_function': loss_function, 'clamp': clamp, 'clip': clip, 'activity_reg': activity_reg,\
                                                                               'l2_penalty_bool': l2_penalty_bool, 'l2_penalty_val': l2_penalty_val, 'ams_grad': ams_grad}
                                                                TCN_hyperparams_list.append(next_params)
print("Number of hyperparam configurations: ", len(TCN_hyperparams_list))                  

In [None]:
'''
Capture output and save to a text file
'''
#%%capture cap --no-stderr
hyperparams_fixed = TCN_hyperparams_fixed
hyperparams_list = TCN_hyperparams_list
sss = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
# evaluate for all four BC outcomes. Can remove any entry from this list
outcomes = ['los_3', 'los_7', 'mort_icu', 'mort_hosp']
preds_int = Logger()
preds_ext = Logger()
for t in outcomes:
    print("Outcome:", t)
    int_save_str = RESULTS_DIR + '10Fold_TCN_int_' + str(t)
    ext_save_str = RESULTS_DIR + '10Fold_TCN_ext_' + str(t)
    fold=1
    best_fold_F1, best_fold_rmse, best_fold_auc, best_fold_auprc = -np.Inf, -np.Inf, -np.Inf, -np.Inf
    best_fold_model_name = "N/A"
    best_fold = -1
    for train_index, test_index in sss.split(idx[subjects_int.get_level_values('subject_id')], Y_int[t]):
        # Internal: 10-fold cross validation for training split
        best_F1, best_rmse, best_auc, best_auprc = -np.Inf, -np.Inf, -np.Inf, -np.Inf
        best_hyperparams = None
        early_stop_frac = hyperparams_fixed['early_stop_frac']
        best_preds = []
        print("Evaluating TCN for Outcome: %s, Fold: %d"%(t,fold))
        #train_data = subjects
        train_subj = subjects_int[train_index].get_level_values('subject_id')
        test_subj = subjects_int[test_index].get_level_values('subject_id')
        
        [(X_train, X_test), (Ys_train, Ys_test)] = [
            [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj,  test_subj)] \
            for df in (X_int, Y_int)
        ]

        for df in X_train, X_test, Ys_train, Ys_test: assert not df.isnull().any().any()
            
        file1_path = PROCESSED_DATA_DIR + "SEED" + str(SEED) + "_FOLD" + str(fold) + '_TRAIN_X_train_obs_tmp.h5'
        file1_pathlib = pathlib.Path(file1_path)
        file2_path = PROCESSED_DATA_DIR + str(SEED) + "_" + str(fold) + '_TRAIN_X_train_early_stop_tmp.h5'
        file2_pathlib = pathlib.Path(file2_path)
        file3_path = PROCESSED_DATA_DIR + str(SEED) + "_" + str(fold) + '_TRAIN_X_test_tmp.h5'
        file3_pathlib = pathlib.Path(file3_path)
        file4_path = PROCESSED_DATA_DIR + str(SEED) + "_" + str(fold) + '_TRAIN_Ys_train_obs.h5'
        file4_pathlib = pathlib.Path(file4_path)
        file5_path = PROCESSED_DATA_DIR + str(SEED) + "_" + str(fold)+ '_TRAIN_Ys_train_early_stop.h5'
        file5_pathlib = pathlib.Path(file5_path)
        file6_path = PROCESSED_DATA_DIR + str(SEED) + "_" + str(fold) + '_TRAIN_Ys_test.h5'
        file6_pathlib = pathlib.Path(file6_path)

        '''
        Preprocessing in optimal way takes a lot of time. Therefore, we run the preprocessing with a unique seed and save the dataframe in a more efficient format.
        Running the first time will take a while. RUnning again will dive into model training/eval much faster
        '''
        # if preprocessing for this seed has already been done
        if file1_pathlib.exists() and file2_pathlib.exists() and file3_pathlib.exists() and file4_pathlib.exists() and file5_pathlib.exists() and file6_pathlib.exists():
            X_train_obs_tmp = pd.read_hdf(file1_path,key='X_train_obs_tmp')
            X_train_early_stop_tmp = pd.read_hdf(file2_path,key='X_train_early_stop_tmp')
            X_test_tmp = pd.read_hdf(file3_path,key='X_test_tmp')
            Ys_train_obs = pd.read_hdf(file4_path, key='Ys_train_obs')
            Ys_train_early_stop = pd.read_hdf(file5_path, key='Ys_train_early_stop')
            Ys_test = pd.read_hdf(file6_path, key='Ys_test')
        else:
            set_primary_seeds(SEED)
            all_train_subjects = list(
                np.random.permutation(Ys_train.index.get_level_values('subject_id').values)
            )
            N_early_stop        = int(len(all_train_subjects) * early_stop_frac)
            train_subjects      = all_train_subjects[:-N_early_stop]
            early_stop_subjects = all_train_subjects[-N_early_stop:]
            
            print("Train subjects length: ", len(train_subjects))
            print("ES/valid subjects length: ", len(early_stop_subjects))   

            # try normalizing the "time since measured columns"
            idx = pd.IndexSlice 
            time_since_normalizer = preprocessing.MinMaxScaler()
            time_since_normalizer.fit(X_train.loc[:,idx[:,['time_since_measured']]])
            X_train.loc[:,idx[:,['time_since_measured']]] = time_since_normalizer.transform(X_train.loc[:,idx[:,['time_since_measured']]])
            X_test.loc[:,idx[:,['time_since_measured']]] = time_since_normalizer.transform(X_test.loc[:,idx[:,['time_since_measured']]])

            X_train_obs         = X_train[X_train.index.get_level_values('subject_id').isin(train_subjects)]
            Ys_train_obs        = Ys_train[Ys_train.index.get_level_values('subject_id').isin(train_subjects)]
            X_train_early_stop  = X_train[X_train.index.get_level_values('subject_id').isin(early_stop_subjects)]
            Ys_train_early_stop = Ys_train[Ys_train.index.get_level_values('subject_id').isin(early_stop_subjects)]

            # if we have a tanh activation we want to cast the 0 labels to -1 in the data splits
            if t != 'los_icu':
                Ys_train_obs = Ys_train_obs.astype(int).replace(to_replace=-1, value=0)
                Ys_train_early_stop = Ys_train_early_stop.astype(int).replace(to_replace=-1, value=0)
                Ys_test = Ys_test.astype(int).replace(to_replace=-1, value=0)
                Ys_test = Ys_test.astype(int).replace(to_replace=-1, value=0)

            X_train_obs_tmp = X_train_obs.copy()
            X_train_early_stop_tmp = X_train_early_stop.copy()
            X_test_tmp = X_test.copy()

            print("Stack data - start...")
            manager = multiprocessing.Manager()
            shared_dict = manager.dict()

            dataframes = [X_train_obs_tmp, X_train_early_stop_tmp, X_test_tmp]
            for i,df in enumerate(dataframes):
                p = multiprocessing.Process(target=ps_spawn, args=(i,shared_dict,df))
                p.start()
                p.join()

            X_train_obs_tmp = shared_dict[0]
            X_train_early_stop_tmp = shared_dict[1]
            X_test_tmp = shared_dict[2]

            X_train_obs_tmp.to_hdf(file1_path, key='X_train_obs_tmp', mode='w')
            X_train_early_stop_tmp.to_hdf(file2_path, key='X_train_early_stop_tmp', mode='w')
            X_test_tmp.to_hdf(file3_path, key='X_test_tmp', mode='w')
            Ys_train_obs.to_hdf(file4_path, key='Ys_train_obs', mode='w')
            Ys_train_early_stop.to_hdf(file5_path, key='Ys_train_early_stop', mode='w')
            Ys_test.to_hdf(file6_path, key='Ys_test', mode='w')
            print("Stack data - complete...")

        if t in ['mort_icu', 'los_3', 'mort_hosp', 'los_7']:
            output_last = True
        elif t=='los_icu':
            output_last = False
        else:
            print("invalid label for 'output_last' check")
        for i, hyperparams in enumerate(hyperparams_list):
            model_name = "Fold%d_ParamSet%d"%(fold,i)
            print("Beginning Evaluation for: %s"%(model_name))
            print("Hyperparams Set: %d / %d (hyperparams = %s)" % (i+1, len(hyperparams_list), repr((hyperparams))))            
            model_hyperparams = copy.copy(hyperparams_fixed)
            model_hyperparams.update(
                {k: v for k, v in hyperparams.items()}
            )
            batch_size = model_hyperparams['batch_size']
            
            # MULTIVARIATE DL
            train_dataloader      = prepare_2d_dataloader(X_train_obs_tmp, Ys_train_obs[t], batch_size=batch_size)
            early_stop_dataloader = prepare_2d_dataloader(X_train_early_stop_tmp, Ys_train_early_stop[t], batch_size=batch_size)
            test_dataloader        = prepare_2d_dataloader(X_test_tmp, Ys_test[t], batch_size=batch_size)
            print("init dataloaders complete")
            
            # Create the model based on the parameters defined above
            channel_sizes = [model_hyperparams['nhid']]*model_hyperparams['levels']
            model = TCN(model_hyperparams['input_channels'], model_hyperparams['n_classes'], channel_sizes,\
                        kernel_size=model_hyperparams['kernel_size'], dropout=model_hyperparams['dropout'])

            set_primary_seeds(SEED)
            best_model, _ = TCN_Train_Model(
                model, train_dataloader, early_stop_dataloader, output_last=output_last,
                **{k: v for k, v in model_hyperparams.items() if k in (
                    'num_epochs', 'patience', 'learning_rate', 'lr_deg', 'batch_size', 'clip', 'loss_function', 'clamp', 'activity_reg', 'l2_penalty_bool', 'l2_penalty_val', 'amsgrad'
                )}
            )
            
            if test_dataloader is not None:
                set_primary_seeds(SEED)
                probabilities_test, labels_test = predict_proba(best_model,test_dataloader,loss_function=model_hyperparams['loss_function'], output_last=output_last)
                #probabilities_dev, labels_dev = np.array(probabilities_dev).flatten(order='C'), np.array(labels_dev).flatten(order='C')
                #probabilities_dev = np.concatenate(probabilities_dev)[:, 1]
                y_score           = np.concatenate(probabilities_test)
                y_true        = np.concatenate(labels_test)
                subject_idx = list(range(0,len(y_score)))
                
                print("Internal validation testing for our best model: %s, on target %s" % (model_name, t))
                if output_last:
                    #y_score = probabilities_dev
                    if model_hyperparams['loss_function'] == 1:
                        rounds = [0, 1]
                    elif model_hyperparams['loss_function'] == 2:
                        rounds = [-1,1]
#                     x = np.subtract.outer(y_score, rounds)
#                     cols = np.argmin(abs(x), axis=1).round()
#                     y_pred = [rounds[i] for i in cols]
                    rounds = [0,1]
                    x = np.subtract.outer(y_score, rounds)
                    cols = np.argmin(abs(x), axis=1).round()
                    y_pred = [rounds[i] for i in cols]

                    for elem in y_true:
                        if elem not in rounds:
                            print("numerical error y_trues")
                    for elem in y_pred:
                        if elem not in rounds:
                            print("numerical error pred")
                    auc = roc_auc_score(y_true, y_score)
                    auprc = average_precision_score(y_true, y_score)
                    acc   = accuracy_score(y_true, y_pred)
                    prec = precision_score(y_true, y_pred)
                    rec = recall_score(y_true, y_pred)
                    F1    = f1_score(y_true, y_pred)
                    print("auc->%f, auprc->%f, acc->%f, prec->%f, rec->%f, F1->%f" % (auc, auprc, acc, prec, rec, F1))
                    if auc > best_auc:
                        best_auc, best_hyperparams = auc, hyperparams
                        print("New Best AUC within Fold (%d): %.2f @ hyperparams = %s" % (fold, 100*best_auc, repr((best_hyperparams))))
                        # save our best model just in case we want it later
#                         torch.save(best_model.module.state_dict(), 'results/best_model.pt')
                        best_preds = y_score
                else:
                    mse = mean_squared_error(y_score, labels_test)
                    rmse = sqrt(mse)
                    mae = mean_absolute_error(y_score, labels_test)
                    pred_mean = np.mean(labels_test)
                    pred_std = np.std(labels_test)
                    label_mean = np.mean(y_score)
                    label_std = np.std(y_score)
                    print("mse->%f, rmse->%f, MAE->%f, (pred mean, pred_std)->(%f,%f), (label_mean, label_std)->(%f,%f)" % (mse, rmse, mae, pred_mean, pred_std, label_mean, label_std))
#                     results[model_name][t][n] = None, model_hyperparams, mse, rmse
                    if rmse > best_rmse:
                        best_rmse, best_hyperparams = rmse, hyperparams
                        print("New Best RMSE within Fold (%d): %.2f @ hyperparams = %s" % (fold, best_rmse, repr((best_hyperparams))))
                        # save our best model just in case we want it later
#                         torch.save(best_model.module.state_dict(), 'results/best_model.pt')
                        best_preds = y_score
        if output_last:
            if best_auc > best_fold_auc:
                best_fold_auc, best_fold_hyperparams = best_auc, best_hyperparams
                best_fold_model_name = model_name
                print("New Best AUC across all folds: %.2f @ hyperparams = %s" % (100*best_fold_auc, repr((best_fold_hyperparams))))
                # save our best model just in case we want it later
#                 torch.save(best_model.module.state_dict(), 'results/best_overall_model.pt')
                best_fold = fold
        else:
            if best_rmse > best_fold_rmse:
                best_fold_rmse, best_fold_hyperparams = best_rmse, best_hyperparams
                best_fold_model_name = model_name
                print("New Best RMSE across all folds: %.2f @ hyperparams = %s" % (best_fold_rmse, repr((best_fold_hyperparams))))
                # save our best model just in case we want it later
    #                 torch.save(best_model.module.state_dict(), 'results/best_overall_model.pt')
                best_fold = fold
        assert len(y_true) == len(best_preds), "Labels (%d) and preds lengths (%d) dont match"%(len(y_true),len(best_preds))
        print("Appending best predictions from Fold %d. Length: %d"%(fold, len(best_preds)))
        preds_int.append_logger(subject_idx, y_true, best_preds, label=t, fold=fold)
        fold+=1
        print()
    # External: train/validation and testing on all data
    model_hyperparams = copy.copy(hyperparams_fixed)
    model_hyperparams.update(
        {k: v for k, v in best_fold_hyperparams.items()}
    )
    early_stop_frac = model_hyperparams['early_stop_frac']
    batch_size = model_hyperparams['batch_size']
    
    print("Begin external testing")
    print("X internal shape: (%d,%d)"%(X_int.shape[0]/24,X_int.shape[1]))
    print("X external shape: (%d,%d)"%(X_ext.shape[0]/24,X_ext.shape[1]))
    print("Y internal shape: (%d,%d)"%(Y_int.shape[0],Y_int.shape[1]))
    print("Y external shape: (%d,%d)"%(Y_ext.shape[0],Y_ext.shape[1]))
    
    file1_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_X_train_obs_tmp.h5'
    file1_pathlib = pathlib.Path(file1_path)
    file2_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_X_train_early_stop_tmp.h5'
    file2_pathlib = pathlib.Path(file2_path)
    file3_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_X_test_tmp.h5'
    file3_pathlib = pathlib.Path(file3_path)
    file4_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_Ys_train_obs.h5'
    file4_pathlib = pathlib.Path(file4_path)
    file5_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_Ys_train_early_stop.h5'
    file5_pathlib = pathlib.Path(file5_path)
    file6_path = PROCESSED_DATA_DIR + str(SEED) + '_TEST_Ys_test.h5'
    file6_pathlib = pathlib.Path(file6_path)

    # preprocessing for this seed has already been done
    if file1_pathlib.exists() and file2_pathlib.exists() and file3_pathlib.exists() and file4_pathlib.exists() and file5_pathlib.exists() and file6_pathlib.exists():
        X_train_obs_tmp = pd.read_hdf(file1_path,key='X_train_obs_tmp')
        X_train_early_stop_tmp = pd.read_hdf(file2_path,key='X_train_early_stop_tmp')
        X_test_tmp = pd.read_hdf(file3_path,key='X_test_tmp')
        Ys_train_obs = pd.read_hdf(file4_path, key='Ys_train_obs')
        Ys_train_early_stop = pd.read_hdf(file5_path, key='Ys_train_early_stop')
        Ys_test = pd.read_hdf(file6_path, key='Ys_test')
    else:
        set_primary_seeds(SEED)
        all_train_subjects = list(
            np.random.permutation(Y_int.index.get_level_values('subject_id').values)
        )
        N_early_stop        = int(len(all_train_subjects) * early_stop_frac)
        train_subjects      = all_train_subjects[:-N_early_stop]
        early_stop_subjects = all_train_subjects[-N_early_stop:]

        # try normalizing the "time since measured columns"
        idx = pd.IndexSlice 
        time_since_normalizer = preprocessing.MinMaxScaler()
        time_since_normalizer.fit(X_train.loc[:,idx[:,['time_since_measured']]])
        X_int.loc[:,idx[:,['time_since_measured']]] = time_since_normalizer.transform(X_int.loc[:,idx[:,['time_since_measured']]])
        X_ext.loc[:,idx[:,['time_since_measured']]] = time_since_normalizer.transform(X_ext.loc[:,idx[:,['time_since_measured']]])

        X_train_obs         = X_int[X_int.index.get_level_values('subject_id').isin(train_subjects)]
        Ys_train_obs        = Y_int[Y_int.index.get_level_values('subject_id').isin(train_subjects)]
        X_train_early_stop  = X_int[X_int.index.get_level_values('subject_id').isin(early_stop_subjects)]
        Ys_train_early_stop = Y_int[Y_int.index.get_level_values('subject_id').isin(early_stop_subjects)]
        X_test_tmp = X_ext
        Ys_test = Y_ext

        # if we have a tanh activation we want to cast the 0 labels to -1 in the data splits
        if t != 'los_icu':
            Ys_train_obs = Ys_train_obs.astype(int).replace(to_replace=-1, value=0)
            Ys_train_early_stop = Ys_train_early_stop.astype(int).replace(to_replace=-1, value=0)
            Ys_test = Ys_test.astype(int).replace(to_replace=-1, value=0)

        X_train_obs_tmp = X_train_obs.copy()
        X_train_early_stop_tmp = X_train_early_stop.copy()
        X_test_tmp = X_ext.copy()

        print("Stack data - start...")
        manager = multiprocessing.Manager()
        shared_dict = manager.dict()

        dataframes = [X_train_obs_tmp, X_train_early_stop_tmp, X_test_tmp]
        for i,df in enumerate(dataframes):
            p = multiprocessing.Process(target=ps_spawn, args=(i,shared_dict,df))
            p.start()
            p.join()

        X_train_obs_tmp = shared_dict[0]
        X_train_early_stop_tmp = shared_dict[1]
        X_test_tmp = shared_dict[2]

        X_train_obs_tmp.to_hdf(file1_path, key='X_train_obs_tmp', mode='w')
        X_train_early_stop_tmp.to_hdf(file2_path, key='X_train_early_stop_tmp', mode='w')
        X_test_tmp.to_hdf(file3_path, key='X_test_tmp', mode='w')
        Ys_train_obs.to_hdf(file4_path, key='Ys_train_obs', mode='w')
        Ys_train_early_stop.to_hdf(file5_path, key='Ys_train_early_stop', mode='w')
        Ys_test.to_hdf(file6_path, key='Ys_test', mode='w')
        print("Stack data - complete...")
    if t in ['mort_icu', 'los_3', 'mort_hosp', 'los_7']:
        output_last = True
    elif t=='los_icu':
        output_last = False
    else:
        print("invalid label for 'output_last' check")

    # MULTIVARIATE DL
    print("External Dims: ")
    print("X internal train: ", X_train_obs_tmp.shape)
    print("Y internal train: ", Ys_train_obs[t].shape)
    print("X internal val: ", X_train_early_stop_tmp.shape)
    print("Y internal val: ", Ys_train_early_stop[t].shape)
    print("X external test: ", X_test_tmp.shape)
    print("Y external test: ", Ys_test[t].shape)
    train_dataloader      = prepare_2d_dataloader(X_train_obs_tmp, Ys_train_obs[t], batch_size=batch_size)
    early_stop_dataloader = prepare_2d_dataloader(X_train_early_stop_tmp, Ys_train_early_stop[t], batch_size=batch_size)
    test_dataloader        = prepare_2d_dataloader(X_test_tmp, Ys_test[t], batch_size=1)
    #test_dataloader       = prepare_2d_dataloader(X_test_tmp, Ys_test[t], batch_size=batch_size)
    
    print("init dataloaders complete")
    # Create the model based on the parameters defined above
    channel_sizes = [model_hyperparams['nhid']]*model_hyperparams['levels']
    model = TCN(model_hyperparams['input_channels'], model_hyperparams['n_classes'], channel_sizes,\
                kernel_size=model_hyperparams['kernel_size'], dropout=model_hyperparams['dropout'])


    set_primary_seeds(SEED)
    best_model, _ = TCN_Train_Model(
        model, train_dataloader, early_stop_dataloader, output_last=output_last,
        **{k: v for k, v in model_hyperparams.items() if k in (
            'num_epochs', 'patience', 'learning_rate', 'lr_deg', 'batch_size', 'clip', 'loss_function', 'clamp', 'activity_reg', 'l2_penalty_bool', 'l2_penalty_val', 'amsgrad'
        )}
    )

    if test_dataloader is not None:
        set_primary_seeds(SEED)
        probabilities_test, labels_test = predict_proba(best_model,test_dataloader,loss_function=model_hyperparams['loss_function'], output_last=output_last)
        y_score           = np.concatenate(probabilities_test)
        y_true        = np.concatenate(labels_test)
        subject_idx = list(range(0,len(y_score)))
        
        #y_score = probabilities_dev
        print("External validation testing for our best model: %s, on target %s" % (best_fold_model_name, t))
        print("(hyperparams = %s)" % (repr((model_hyperparams))))
        if output_last:
            if model_hyperparams['loss_function'] == 1:
                rounds = [0, 1]
            elif model_hyperparams['loss_function'] == 2:
                rounds = [-1,1]
            x = np.subtract.outer(y_score, rounds)
            cols = np.argmin(abs(x), axis=1).round()
            y_pred = [rounds[i] for i in cols]

            for elem in y_true:
                if elem not in rounds:
                    print("numerical error y_trues")
            for elem in y_pred:
                if elem not in rounds:
                    print("numerical error pred")

            auc   = roc_auc_score(y_true, y_score)
            auprc = average_precision_score(y_true, y_score)
            acc   = accuracy_score(y_true, y_pred)
            prec  = precision_score(y_true, y_pred)
            rec   = recall_score(y_true, y_pred)
            F1    = f1_score(y_true, y_pred)
            print("auc->%f, auprc->%f, acc->%f, prec->%f, rec->%f, F1->%f" % (auc, auprc, acc, prec, rec, F1))
#             results[model_name][t][n] = None, model_hyperparams, auc, auprc, acc, F1, best_auc
        else:
            mse = mean_squared_error(y_score, labels_test)
            rmse = sqrt(mse)
            mae = mean_absolute_error(y_score, labels_test)
            pred_mean = np.mean(labels_test)
            pred_std = np.std(labels_test)
            label_mean = np.mean(y_score)
            label_std = np.std(y_score)

#             print("RMSE(l2)       ---        MSE        ---        MAE(l1)        ---        (pred_mean, pred_std)        ---        (label_mean, label_std)")
            print("mse->%f, rmse->%f, MAE->%f, (pred mean, pred_std)->(%f,%f), (label_mean, label_std)->(%f,%f)" % (mse, rmse, mae, pred_mean, pred_std, label_mean, label_std))
#             results[model_name][t][n] = None, model_hyperparams, mse, rmse
        assert len(y_true) == len(y_score), "Labels (%d) and preds lengths (%d) dont match"%(len(y_true),len(best_preds))
        print("Appending external test predictions, length: %d"%(len(best_preds)))
        preds_ext.append_logger(subject_idx, y_true, y_score, label=t, fold=-1)
#     with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
    print("\n")
preds_int.df = preds_df_to_int(preds_int.df)
preds_int.df.to_csv(RESULTS_DIR+"TCN_internal_test_preds.csv")
preds_ext.df = preds_df_to_int(preds_ext.df)
preds_ext.df.to_csv(RESULTS_DIR+"TCN_external_test_preds.csv")

In [None]:
'''
write cell output (text) from above into a dedicate logfile
'''
write_file = RESULTS_DIR + 'TCN_main_output.txt'
with open(write_file, 'w') as f:
    f.write(cap.stdout)
del(cap)