In [None]:
import numpy as np
import pandas as pd
import sys
import os
import pickle 

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import label_binarize
from sklearn.ensemble import RandomForestClassifier
import scipy.stats as ss

In [None]:
sys.path.append('../utils')
from simple_impute import simple_imputer

# Task Specifics

In [None]:
INTERVENTION = 'vent'
RANDOM = 0
MAX_LEN = 240
SLICE_SIZE = 6
GAP_TIME = 6
PREDICTION_WINDOW = 4
OUTCOME_TYPE = 'all'
NUM_CLASSES = 4

In [None]:
CHUNK_KEY = {'ONSET': 0, 'CONTROL': 1, 'ON_INTERVENTION': 2, 'WEAN': 3}

# Load Data

In [None]:
DATAFILE = '../data/all_hourly_data.h5'

In [None]:
X = pd.read_hdf(DATAFILE,'vitals_labs')
Y = pd.read_hdf(DATAFILE,'interventions')
static = pd.read_hdf(DATAFILE,'patients')

In [None]:
Y = Y[[INTERVENTION]]

In [None]:
print 'Shape of X : ', X.shape
print 'Shape of Y : ', Y.shape
print 'Shape of static : ', static.shape

# Preprocessing Data

## Train-Test Split, Stratified

In [None]:
train_ids, test_ids = train_test_split(static.reset_index(), test_size=0.2, 
                                       random_state=RANDOM, stratify=static['mort_hosp'])
split_train_ids, val_ids = train_test_split(train_ids, test_size=0.125, 
                                            random_state=RANDOM, stratify=train_ids['mort_hosp'])

## Imputation and Standardization of Time Series Features

In [None]:
X_clean = simple_imputer(X,train_ids['subject_id'])

In [None]:
def minmax(x):# normalize
    mins = x.min()
    maxes = x.max()
    x_std = (x - mins) / (maxes - mins)
    return x_std

In [None]:
def std_time_since_measurement(x):
    idx = pd.IndexSlice
    x = np.where(x==100, 0, x)
    means = x.mean()
    stds = x.std()
    x_std = (x - means)/stds
    return x_std

In [None]:
idx = pd.IndexSlice
X_std = X_clean.copy()
X_std.loc[:,idx[:,'mean']] = X_std.loc[:,idx[:,'mean']].apply(lambda x: minmax(x))
X_std.loc[:,idx[:,'time_since_measured']] = X_std.loc[:,idx[:,'time_since_measured']].apply(lambda x: std_time_since_measurement(x))

In [None]:
X_std.columns = X_std.columns.droplevel(-1)

In [None]:
del X

## Categorization of Static Features

In [None]:
def categorize_age(age):
    if age > 10 and age <= 30: 
        cat = 1
    elif age > 30 and age <= 50:
        cat = 2
    elif age > 50 and age <= 70:
        cat = 3
    else: 
        cat = 4
    return cat

def categorize_ethnicity(ethnicity):
    if 'AMERICAN INDIAN' in ethnicity:
        ethnicity = 'AMERICAN INDIAN'
    elif 'ASIAN' in ethnicity:
        ethnicity = 'ASIAN'
    elif 'WHITE' in ethnicity:
        ethnicity = 'WHITE'
    elif 'HISPANIC' in ethnicity:
        ethnicity = 'HISPANIC/LATINO'
    elif 'BLACK' in ethnicity:
        ethnicity = 'BLACK'
    else: 
        ethnicity = 'OTHER'
    return ethnicity

In [None]:
# use gender, first_careunit, age and ethnicity for prediction
static_to_keep = static[['gender', 'age', 'ethnicity', 'first_careunit', 'intime']]
static_to_keep.loc[:, 'intime'] = static_to_keep['intime'].astype('datetime64').apply(lambda x : x.hour)
static_to_keep.loc[:, 'age'] = static_to_keep['age'].apply(categorize_age)
static_to_keep.loc[:, 'ethnicity'] = static_to_keep['ethnicity'].apply(categorize_ethnicity)
static_to_keep = pd.get_dummies(static_to_keep, columns = ['gender', 'age', 'ethnicity', 'first_careunit'])

## Create Feature Matrix

In [None]:
# merge time series and static data
X_merge = pd.merge(X_std.reset_index(), static_to_keep.reset_index(), on=['subject_id','icustay_id','hadm_id'])
# add absolute time feature
abs_time = (X_merge['intime'] + X_merge['hours_in'])%24
X_merge.insert(4, 'absolute_time', abs_time)
X_merge.drop('intime', axis=1, inplace=True)
X_merge = X_merge.set_index(['subject_id','icustay_id','hadm_id','hours_in'])

In [None]:
del X_std, X_clean

## Make Tensors

In [None]:
def create_x_matrix(x):
    zeros = np.zeros((MAX_LEN, x.shape[1]-4))
    x = x.values
    x = x[:(MAX_LEN), 4:]
    zeros[0:x.shape[0], :] = x
    return zeros

def create_y_matrix(y):
    zeros = np.zeros((MAX_LEN, y.shape[1]-4))
    y = y.values
    y = y[:,4:]
    y = y[:MAX_LEN, :]
    zeros[:y.shape[0], :] = y
    return zeros

In [None]:
x = np.array(list(X_merge.reset_index().groupby('subject_id').apply(create_x_matrix)))
y = np.array(list(Y.reset_index().groupby('subject_id').apply(create_y_matrix)))[:,:,0]

In [None]:
lengths = np.array(list(X_merge.reset_index().groupby('subject_id').apply(lambda x: x.shape[0])))

In [None]:
keys = pd.Series(X_merge.reset_index()['subject_id'].unique())

In [None]:
print("X tensor shape: ", x.shape)
print("Y tensor shape: ", y.shape)
print("lengths shape: ", lengths.shape)

## Stratified Sampling

In [None]:
train_indices = np.where(keys.isin(train_ids['subject_id']))[0]
test_indices = np.where(keys.isin(test_ids['subject_id']))[0]
train_static = train_ids
split_train_indices = np.where(keys.isin(split_train_ids['subject_id']))[0]
val_indices = np.where(keys.isin(val_ids['subject_id']))[0]

In [None]:
X_train = x[split_train_indices]
Y_train = y[split_train_indices]
X_test = x[test_indices]
Y_test = y[test_indices]
X_val = x[val_indices]
Y_val = y[val_indices]
lengths_train = lengths[split_train_indices]
lengths_val = lengths[val_indices]
lengths_test = lengths[test_indices]

In [None]:
print("Training size: ", X_train.shape[0])
print("Validation size: ", X_val.shape[0])
print("Test size: ", X_test.shape[0])

## Make Windows

In [None]:
def make_3d_tensor_slices(X_tensor, Y_tensor, lengths):

    num_patients = X_tensor.shape[0]
    timesteps = X_tensor.shape[1]
    num_features = X_tensor.shape[2]
    X_tensor_new = np.zeros((lengths.sum(), SLICE_SIZE, num_features + 1))
    Y_tensor_new = np.zeros((lengths.sum()))

    current_row = 0
    
    for patient_index in range(num_patients):
        x_patient = X_tensor[patient_index]
        y_patient = Y_tensor[patient_index]
        length = lengths[patient_index]

        for timestep in range(length - PREDICTION_WINDOW - GAP_TIME - SLICE_SIZE):
            x_window = x_patient[timestep:timestep+SLICE_SIZE]
            y_window = y_patient[timestep:timestep+SLICE_SIZE]
            x_window = np.concatenate((x_window, np.expand_dims(y_window,1)), axis=1)
            result_window = y_patient[timestep+SLICE_SIZE+GAP_TIME:timestep+SLICE_SIZE+GAP_TIME+PREDICTION_WINDOW]
            result_window_diff = set(np.diff(result_window))
            #if 1 in result_window_diff: pdb.set_trace()
            gap_window = y_patient[timestep+SLICE_SIZE:timestep+SLICE_SIZE+GAP_TIME]
            gap_window_diff = set(np.diff(gap_window))

            #print result_window, result_window_diff

            if OUTCOME_TYPE == 'binary':
                if max(gap_window) == 1:
                    result = None
                elif max(result_window) == 1:
                    result = 1
                elif max(result_window) == 0:
                    result = 0
                if result != None:
                    X_tensor_new[current_row] = x_window
                    Y_tensor_new[current_row] = result
                    current_row += 1

            else: 
                if 1 in gap_window_diff or -1 in gap_window_diff:
                    result = None
                elif (len(result_window_diff) == 1) and (0 in result_window_diff) and (max(result_window) == 0):
                    result = CHUNK_KEY['CONTROL']
                elif (len(result_window_diff) == 1) and (0 in result_window_diff) and (max(result_window) == 1):
                    result = CHUNK_KEY['ON_INTERVENTION']
                elif 1 in result_window_diff: 
                    result = CHUNK_KEY['ONSET']
                elif -1 in result_window_diff:
                    result = CHUNK_KEY['WEAN']
                else:
                    result = None

                if result != None:
                    X_tensor_new[current_row] = x_window
                    Y_tensor_new[current_row] = result
                    current_row += 1

    X_tensor_new = X_tensor_new[:current_row,:,:]
    Y_tensor_new = Y_tensor_new[:current_row]

    return X_tensor_new, Y_tensor_new

In [None]:
x_train, y_train = make_3d_tensor_slices(X_train, Y_train, lengths_train)
x_val, y_val = make_3d_tensor_slices(X_val, Y_val, lengths_val)
x_test, y_test = make_3d_tensor_slices(X_test, Y_test, lengths_test)

In [None]:
y_train_classes = label_binarize(y_train, classes=range(NUM_CLASSES))
y_val_classes = label_binarize(y_val, classes=range(NUM_CLASSES))
y_test_classes = label_binarize(y_test, classes=range(NUM_CLASSES))

In [None]:
del X_train, Y_train, X_test, Y_test, X_val, Y_val

In [None]:
print('shape of x_train: ', x_train.shape)
print('shape of x_val: ', x_val.shape)
print('shape of x_test: ', x_test.shape)

# Random Forest and Logistic Regression

## Prepare data

In [None]:
static_col = 17 #static_to_keep.shape[1] - 1
time_series_col = 124 #X_merge.shape[1] - static_col

In [None]:
def remove_duplicate_static(x):
    x_static = x[:,0,time_series_col:x.shape[2]-1]
    x_timeseries = np.reshape(x[:,:,:time_series_col],(x.shape[0], -1))
    x_int = x[:,:,-1]
    x_concat = np.concatenate((x_static, x_timeseries, x_int), axis=1)
    return x_concat

In [None]:
# concatenate hourly features
x_train_concat = remove_duplicate_static(x_train)
x_val_concat = remove_duplicate_static(x_val)
x_test_concat = remove_duplicate_static(x_test)

In [None]:
print(x_train_concat.shape)
print(x_val_concat.shape)
print(x_test_concat.shape)

## Hyperparameter Generation

In [None]:
class DictDist():
    def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs
    def rvs(self, n):
        a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}
        out = []
        for i in range(n): out.append({k: vs[i] for k, vs in a.items()})
        return out
    
class Choice():
    def __init__(self, options): self.options = options
    def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]

In [None]:
N = 10
np.random.seed(RANDOM)
LR_dist = DictDist({
    'C': Choice(np.geomspace(1e-3, 1e3, 10000)),
    'penalty': Choice(['l2']),
    'solver': Choice(['sag']),
    'max_iter': Choice([100, 200]),
    'class_weight': Choice(['balanced']),
    'multi_class': Choice(['multinomial']),
    'random_state': Choice([RANDOM])
})
LR_hyperparams_list = LR_dist.rvs(N)
        
RF_dist = DictDist({
    'n_estimators': ss.randint(50, 200),
    'max_depth': ss.randint(2, 10),
    'min_samples_split': ss.randint(2, 75),
    'min_samples_leaf': ss.randint(1, 50),
    'class_weight': Choice(['balanced']),
    'random_state': Choice([RANDOM])

})
RF_hyperparams_list = RF_dist.rvs(N)

## Fit model

In [None]:
def run_basic(model, hyperparams_list, X_train, X_val, X_test):
    best_s, best_hyperparams = -np.Inf, None
    for i, hyperparams in enumerate(hyperparams_list):
        print("On sample %d / %d (hyperparams = %s)" % (i+1, len(hyperparams_list), repr((hyperparams))))
        M = model(**hyperparams)
        M.fit(X_train, y_train)
        s = roc_auc_score(y_val_classes, M.predict_proba(X_val),average='macro')
        if s > best_s:
            best_s, best_hyperparams = s, hyperparams
            print("New Best Score: %.2f @ hyperparams = %s" % (100*best_s, repr((best_hyperparams))))

    return run_only_final(model, best_hyperparams, X_train, X_val, X_test)

def run_only_final(model, best_hyperparams, X_train, X_val, X_test):
    best_M = model(**best_hyperparams)
    best_M.fit(np.concatenate((X_train, X_val)), np.concatenate((y_train, y_val)))
    y_pred  = best_M.predict_proba(X_test)
    auc   = roc_auc_score(y_test_classes, y_pred, average=None)
    aucmacro = roc_auc_score(y_test_classes, y_pred, average='macro')
    
    return best_M, best_hyperparams, auc, aucmacro

In [None]:
results = {}
for model_name, model, hyperparams_list in [('RF', RandomForestClassifier, RF_hyperparams_list), 
                                            ('LR', LogisticRegression, LR_hyperparams_list)]:
    if model_name not in results: results[model_name] = {}

    print("Running model %s " % (model_name))
    results[model_name] = run_basic(
        model, hyperparams_list, x_train_concat, x_val_concat, x_test_concat)
    print("Final results for model %s " % (model_name))
    print(results[model_name])

# CNN

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Reshape, RepeatVector, Lambda
from keras.layers import Input, Conv2D, Conv1D, Conv3D, MaxPooling2D, MaxPooling1D
from keras.layers import Concatenate
from keras import backend as K
from keras.callbacks import EarlyStopping

In [None]:
from tensorflow import set_random_seed
set_random_seed(RANDOM)

In [None]:
BATCH_SIZE = 128
EPOCHS = 12
DROPOUT = 0.5

In [None]:
from sklearn.utils import class_weight
class_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weight = dict(zip(range(len(class_weight)), class_weight))

In [None]:
input_shape = (x_train.shape[1], x_train.shape[2])
inputs = Input(shape=input_shape)
model = Conv1D(64, kernel_size=3,
                 strides=1,
                 activation='relu',
                 input_shape=input_shape,
                 padding='same',
                 name='conv2')(inputs)

model = (MaxPooling1D(pool_size=3, strides=1))(model)

model2 = Conv1D(64, kernel_size=4,
                 strides=1,
                 activation='relu',
                 input_shape=input_shape,
                 padding='same',
                 name='conv3')(inputs)

model2 = MaxPooling1D(pool_size=3, strides=1)(model2)

model3 = Conv1D(64, kernel_size=5,
                 strides=1,
                 activation='relu',
                 input_shape=input_shape,
                 padding='same',
                 name='conv4')(inputs)

model3 = MaxPooling1D(pool_size=3, strides=1)(model3)

models = [model, model2, model3]

full_model = keras.layers.concatenate(models)
full_model = Flatten()(full_model)
full_model = Dense(128, activation='relu')(full_model)
full_model = Dropout(DROPOUT)(full_model)
full_model = Dense(NUM_CLASSES, activation='softmax')(full_model)

full_model = keras.models.Model(input=inputs, outputs=full_model)

full_model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=.0005),
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=2)

full_model.fit(x_train, y_train_classes,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=1,
          class_weight=class_weight,
          callbacks=[early_stopping],
          validation_data=(x_val, y_val_classes))

In [None]:
test_preds_cnn = full_model.predict(x_test, batch_size=BATCH_SIZE)
print(roc_auc_score(y_test_classes, test_preds_cnn, average=None))
print(roc_auc_score(y_test_classes, test_preds_cnn, average='macro'))
print(roc_auc_score(y_test_classes, test_preds_cnn, average='micro'))

# LSTM

In [None]:
import tensorflow as tf
import functools

In [None]:
BATCH_SIZE = 128
EPOCHS = 12
KEEP_PROB = 0.8
REGULARIZATION = 0.001
NUM_HIDDEN = [512, 512]

In [None]:
def lazy_property(function):
    attribute = '_' + function.__name__

    @property
    @functools.wraps(function)
    def wrapper(self):
        if not hasattr(self, attribute):
            setattr(self, attribute, function(self))
        return getattr(self, attribute)
    return wrapper


class VariableSequenceLabelling:

    def __init__(self, data, target, dropout_prob, reg, num_hidden=[256], class_weights=[1,1,1,1]):
        self.data = data
        self.target = target
        self.dropout_prob = dropout_prob
        self.reg = reg
        self._num_hidden = num_hidden
        self._num_layers = len(num_hidden)
        self.num_classes = len(class_weights)
        self.attn_length = 0
        self.class_weights = class_weights
        self.prediction
        self.error
        self.optimize

    @lazy_property
    def make_rnn_cell(self,
                      attn_length=0,
                      base_cell=tf.nn.rnn_cell.BasicLSTMCell,
                      state_is_tuple=True):

        attn_length = self.attn_length
        input_dropout = self.dropout_prob
        output_dropout = self.dropout_prob

        cells = []
        for num_units in self._num_hidden:
            cell = base_cell(num_units, state_is_tuple=state_is_tuple)
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=input_dropout, output_keep_prob=output_dropout)
            cells.append(cell)

        cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=state_is_tuple)

        if attn_length > 0:
            sys.path.insert(0, 'attention')
            import attention_cell_wrapper_single
            cell = attention_cell_wrapper_single.AttentionCellWrapper(
                cell, attn_length, input_size=int(self.data.get_shape().as_list()[2]), state_is_tuple=state_is_tuple)
            print cell
        return cell


    # predictor for slices
    @lazy_property
    def prediction(self):

        cell = self.make_rnn_cell

        # Recurrent network.
        output, final_state = tf.nn.dynamic_rnn(cell,
            self.data,
            dtype=tf.float32
        )

        with tf.variable_scope("model") as scope:
            tf.get_variable_scope().reuse_variables()

            # final weights
            num_classes = self.num_classes
            weight, bias = self._weight_and_bias(self._num_hidden[-1], num_classes)
    
            # flatten + sigmoid
            if self.attn_length > 0: 
                logits = tf.matmul(final_state[0][-1][-1], weight) + bias
            else: 
                logits = tf.matmul(final_state[-1][-1], weight) + bias

            prediction = tf.nn.softmax(logits)
            
            return logits, prediction

        
    @lazy_property
    def cross_ent(self):
        predictions = self.prediction[0]
        real = tf.cast(tf.squeeze(self.target), tf.int32)

        class_weight = tf.expand_dims(tf.cast(self.class_weights, tf.int32), axis=0)
        print("class_weights", class_weight)
        one_hot_labels = tf.cast(tf.one_hot(real, depth=self.num_classes), tf.int32)
        weight_per_label = tf.cast(tf.transpose(tf.matmul(one_hot_labels, tf.transpose(class_weight))), tf.float32) #shape [1, batch_size]

        xent = tf.multiply(weight_per_label, tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=predictions, name="xent_raw")) #shape [1, batch_size]
        loss = tf.reduce_mean(xent) #shape 1
        ce = loss
        l2 = self.reg * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
        ce += l2
        return ce

    @lazy_property
    def optimize(self):
        learning_rate = 0.0003
        optimizer = tf.train.AdamOptimizer(learning_rate)
        return optimizer.minimize(self.cross_ent)

    @lazy_property
    def error(self):
        prediction = tf.argmax(self.prediction[1], 1)
        real = tf.cast(self.target, tf.int32)
        prediction = tf.cast(prediction, tf.int32)
        mistakes = tf.not_equal(real, prediction)
        mistakes = tf.cast(mistakes, tf.float32)
        mistakes = tf.reduce_sum(mistakes, reduction_indices=0)
        total = 128
        mistakes = tf.divide(mistakes, tf.to_float(total))
        return mistakes

    @staticmethod
    def _weight_and_bias(in_size, out_size):
        weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
        bias = tf.constant(0.1, shape=[out_size])
        return tf.Variable(weight), tf.Variable(bias)


    @lazy_property
    def summaries(self):
        tf.summary.scalar('loss', tf.reduce_mean(self.cross_ent))
        tf.summary.scalar('error', self.error)
        merged = tf.summary.merge_all()
        return merged

In [None]:
tf.reset_default_graph()

config = tf.ConfigProto(allow_soft_placement = True)
# if attn_length > 0:
#     # weights file initialized
#     weight_file = 'weights.txt'
#     with open(weight_file, 'a') as the_file:
#         pass

with tf.Session(config = config) as sess, tf.device('/cpu:0'):
    _, length, num_features = x_train.shape
    num_data_cols = num_features
    print "num features", num_features
    print "num_data cols", num_data_cols

    # placeholders
    data = tf.placeholder(tf.float32, [None, length, num_data_cols])
    target = tf.placeholder(tf.float32, [None])
    dropout_prob = tf.placeholder(tf.float32)
    reg = tf.placeholder(tf.float32)

    # initialization
    model = VariableSequenceLabelling(data, target, dropout_prob, reg, num_hidden=NUM_HIDDEN, class_weights=class_weight)
    sess.run(tf.global_variables_initializer())
    print('Initialized Variables...')

    
    batch_size = BATCH_SIZE
    dp = KEEP_PROB
    rp = REGULARIZATION
    train_samples = x_train.shape[0]
    indices = range(train_samples)
    num_classes = NUM_CLASSES
    
    # for storing results
    test_data = x_test
    val_data = x_val

    val_aucs = []
    test_aucs = []
    val_aucs_macro = []
    test_aucs_macro = []
    
    epoch = -1

    print('Beginning Training...')

    while (epoch < 3 or max(np.diff(early_stop[-3:])) > 0):
        epoch += 1
        np.random.shuffle(indices)

        num_batches = train_samples/batch_size
        for batch_index in range(num_batches):

            sample_indices = indices[batch_index*batch_size:batch_index*batch_size+batch_size]
            batch_data = x_train[sample_indices, :, :num_data_cols]
            batch_target = y_train[sample_indices]
            _, loss = sess.run([model.optimize, model.cross_ent], {data: batch_data, target: batch_target, dropout_prob: dp, reg: rp})

            # write train accuracy to log files every 10 batches
            #if batch_index % 2000 == 0:
            #    loss, prediction, error = sess.run([model.cross_ent, model.prediction, model.error], {data: batch_data, target: batch_target, dropout_prob: dp, reg: rp})
            #    #train_writer.add_summary(summaries, global_step=epoch*batch_index)
            #    print('Epoch {:2d} Batch {:2d}'.format(epoch+1, batch_index))
            #    print('Loss = ', np.mean(loss))
            #    print('Error = ', error)

        cur_val_preds = sess.run(model.prediction, {data: x_val, target: y_val, dropout_prob: 1, reg: rp}) 
        val_preds = cur_val_preds[1]
        
        cur_test_preds = sess.run(model.prediction, {data: x_test, target: y_test, dropout_prob: 1, reg: rp}) 
        test_preds = cur_test_preds[1]

        val_auc_macro = roc_auc_score(y_val_classes, val_preds, average='macro')
        test_auc_macro = roc_auc_score(y_test_classes, test_preds, average='macro')
        val_aucs_macro.append(val_auc_macro)
        test_aucs_macro.append(test_auc_macro)

        val_auc = roc_auc_score(y_val_classes, val_preds, average=None)
        test_auc = roc_auc_score(y_test_classes, test_preds, average=None)
        val_aucs.append(val_auc)
        test_aucs.append(test_auc)
        
        if isinstance(val_aucs_macro[-1], dict):
            early_stop = [val_auc_macro for val_auc_macro in val_aucs_macro]
        else: 
            early_stop = val_aucs_macro


        print "Val AUC = ", val_auc
        print "Test AUC = ", test_auc


    if isinstance(val_aucs_macro[-1], dict):
        best_epoch = np.argmax(np.array([val_auc_macro for val_auc_macro in val_aucs_macro]))
    else: 
        best_epoch = np.argmax(val_aucs_macro)

    best_val_auc = val_aucs[best_epoch]
    best_test_auc = test_aucs[best_epoch]
    best_test_auc_macro = test_aucs_macro[best_epoch]

    print 'Best Test AUC: ', best_test_auc, 'at epoch ', best_epoch
    print 'Best Test AUC Macro: ', best_test_auc_macro, 'at epoch ', best_epoch