In [1]:
import sys
# Append root path 
sys.path.append("../")
sys.path.append("../lmmnn")

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

from model.mixed_effects import *
from utils.fe_models import get_model
from utils.evaluation import *
from utils.utils import *
from data.preprocessing import dataset_preprocessing

# from vis.utils.utils import apply_modifications
# helper function
def update_layer_activation(model, activation, index=-1):
    model.layers[index].activation = activation
    return model

from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Reshape, Embedding, Concatenate
from tensorflow.keras.activations import sigmoid

from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import f1_score as f1
from sklearn.model_selection import train_test_split
from category_encoders import TargetEncoder
from tensorflow_addons.metrics import F1Score

from scipy import stats
import pickle
import yaml
import time
import gc

RS = 555

SUCCESS: Found GPU: /device:GPU:0


In [2]:
class MCMCSamplingCallback(tf.keras.callbacks.Callback):
    def __init__(self,
                 num_mcmc_samples=1,
                 step_size=0.01,
                 perc_burnin=0.1,
                 num_burnin_steps=0,
                 warm_restart=None):
        super().__init__()

        self.num_mcmc_samples = tf.constant(num_mcmc_samples)
        self.perc_burnin = perc_burnin
        self.num_burnin_steps = num_burnin_steps
        self.warm_restart = warm_restart
        self.step_size = tf.Variable(step_size,trainable=False)
        self.step_sizes = []


    def on_train_begin(self, logs=None):
        self.mcmc_kernel = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=self.model.target_log_prob_fn,
            step_size=self.step_size, #0.15
            num_leapfrog_steps=3
        )

        self.get_mcmc_kernel = lambda step_size: tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=self.model.target_log_prob_fn,
            num_leapfrog_steps=3,
            step_size=step_size)

        # self.mcmc_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
        #     inner_kernel=tfp.mcmc.NoUTurnSampler(
        #         target_log_prob_fn=self.model.target_log_prob_fn,
        #         step_size=self.step_size),
        #     num_adaptation_steps=500,
        #     target_accept_prob=0.651)

    def on_epoch_begin(self, epoch, logs=None):
        if epoch==0:
            self.model.all_samples.extend(([[state[num] for state in self.model.current_state] for num in range(1)]))

            self.model.mean_samples = [tf.reduce_mean([sample[q] for sample in self.model.all_samples[round(epoch*(self.perc_burnin)):]], axis=0) for q in
                                       range(len(self.model.qs))]
        if self.model.fe_pretraining:
            if self.model.fe_converged:
                self.run_sampling(epoch)
            else:
                self.model.acceptance_rates.append(-1)
        else:
            self.run_sampling(epoch)

    def run_sampling(self,epoch):
        self.model.fX.assign(self.model.fe_model(self.model.X, training=False))

        if self.model.embed_x:
            self.model.X_embedded.assign(self.model.X_embed_model(self.model.X, training=False))

        if self.model.embed_z:
            for q_num in range(len(self.model.qs)):
                self.model.Z_embedded[q_num].assign(self.model.Z_embed_models[q_num](self.model.Z[:,q_num], training=False))

                ## Find initial step size
        # if self.model.previous_kernel_results.log_accept_ratio == -np.inf:
        # if len(self.model.acceptance_rates)>0 and self.model.acceptance_rates[-1]<0.5:
        if len(self.model.acceptance_rates)>0 and self.model.acceptance_rates[-1]<0.0001:
            # self.mcmc_kernel.parameters["step_size"] = self.mcmc_kernel.parameters["step_size"]/2
            # self.model.previous_kernel_results["new_step_size"] = self.model.previous_kernel_results.step_size/2
            # setattr(self.model.previous_kernel_results, "new_step_size", self.model.previous_kernel_results.step_size/2)
            self.step_size.assign(self.step_size/2)
            print(f"Adapt step size to {float(self.step_size)}")


        if self.warm_restart!=None and epoch>0:
            ## Warm restart
            # if self.model.previous_kernel_results.log_accept_ratio == -np.inf:
                # restart = True
            # else:
                # restart = False
            # else:
            restart = ((epoch + 1) % self.warm_restart) == 0 and epoch != 0

            if restart:
                print("\n Warm restart to unstuck the chain")
                if self.model.embed_z and self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X_embedded, self.model.Z_embedded).sample(1, seed=self.model.RS)[:-1]
                elif self.model.embed_z and not self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X, self.model.Z_embedded).sample(1, seed=self.model.RS)[:-1]
                elif not self.model.embed_z and self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X_embedded, self.model.Z).sample(1, seed=self.model.RS)[:-1]
                else:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X, self.model.Z).sample(1, seed=self.model.RS)[:-1]

        print("\n Start sampling for epoch {} of training".format(epoch + 1))
        start = time.time()
        new_state, self.model.previous_kernel_results = self.get_mcmc_samples(self.model.current_state,
                                                                              tf.constant(self.num_mcmc_samples),
                                                                              None
                                                                                               )
        # self.model.divide_constants.assign(
        #     list(1/np.mean(self.model.data_model._stddev_z,axis=1))+[1.])
        # self.model.divide_constants.assign(
        #     list((lambda x: 1+(x-x.mean()))(np.array(1+tf.math.softmax(1/len(self.model.qs)+0.5*tf.math.softmax(np.abs([np.mean(i) for i in self.model.previous_kernel_results.grads_target_log_prob]))))))+[1.])
        # print(np.round(self.model.divide_constants,2))
        try:
            log_accept_ratio = self.model.previous_kernel_results.log_accept_ratio
        except:
            log_accept_ratio = self.model.previous_kernel_results.inner_results.log_accept_ratio
        acceptance_rate = tf.math.exp(tf.minimum(log_accept_ratio, 0.))

        self.step_sizes.append(float(self.step_size))

        end = time.time()


        self.model.current_state = [tf.identity(i) for i in new_state]
        # Todo: Append all current states
        self.model.acceptance_rates.append(acceptance_rate)
        # self.model.all_samples.append(
        #     [tf.math.reduce_mean(self.model.current_state[q_num], axis=0) for q_num in range(len(self.model.qs))])
        self.model.all_samples.extend(([[state[num] for state in self.model.current_state] for num in range(self.num_mcmc_samples)]))

        self.model.mean_samples = [tf.reduce_mean([sample[q] for sample in self.model.all_samples[round(epoch*(self.perc_burnin)):]], axis=0) for q in
                                   range(len(self.model.qs))]

        self.model.e_step_times.append(round(end - start, 2))

    # def on_epoch_end(self, epoch, logs=None):
        for q_num in range(len(self.model.qs)):
                self.model.data_model.trainable_variables[q_num].assign(
                    tf.math.reduce_std(self.model.current_state[q_num][-1],axis=0))

        self.model.stds.append([tf.identity(i) for i in self.model.data_model._stddev_z])

    @tf.function(reduce_retracing=True)  # autograph=False, jit_compile=True, reduce_retracing=True)
    def get_mcmc_samples(self, current_state, num_mcmc_samples=tf.constant(1), previous_kernel_results=None):
        samples, _, previous_kernel_results = tfp.mcmc.sample_chain(
            kernel=self.get_mcmc_kernel(self.step_size), num_results=num_mcmc_samples,
            current_state=[state[-1] for state in current_state],
            num_burnin_steps=self.num_burnin_steps,
            trace_fn=None, previous_kernel_results=previous_kernel_results,
            return_final_kernel_results=True, seed=self.model.RS)
        #     current_state=[sample[-1] for sample in samples]

        return samples, previous_kernel_results

#### Download and save data from Pargent et al. by running "data/download_pargent2022_datasets.py before running this notebook

In [3]:
mode="cv"
hct=10
test_ratio=None
val_ratio=None
folds=5
results = {}
dataset_names = ["eucalyptus", "Midwest_survey", "hpc-job-scheduling", "video-game-sales", "okcupid-stem", "Diabetes130US"]


loss_use = lambda: tf.keras.losses.CategoricalCrossentropy
target= "categorical"
batch_size=512
epochs = 500
early_stopping = 20
model_name = "AutoGluon"
embed_dims_method = "AutoGluon"

results = {}

#######################################

for dataset_name in dataset_names:
    print(f"Start training procedure for {dataset_name}")
    data_path = f"{mode}_RS{RS}_hct{hct}"
    if mode == "cv":
        data_path += f"_{folds}folds"
    elif mode == "train_test":
        data_path += f"_split{1-test_ratio*100}-{test_ratio*100}"
    elif mode == "train_val_test":
        data_path += f"_split{round(100-(test_ratio+val_ratio)*100)}-{round(test_ratio*100)}-{round(val_ratio*100)}"

    # If no data_dict exists, run preprocessing, else load data_dict
    if not os.path.exists(f"../data/prepared/{dataset_name}/"+data_path+"/data_dict.pickle"):
        dataset_preprocessing.process_dataset(dataset_name, target, mode, RS, hct, test_ratio, val_ratio, folds)
    with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)

    z_cols = data_dict["z_cols"]
    
    results[dataset_name] = {}
    for fold_num in range(folds):
        results[dataset_name][fold_num] = {}

        print(f"Fold no. {fold_num}")
        save_path = f"../results/{dataset_name}/{data_path}/fold_{fold_num}/HMC"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        
        z_ohe_encoded_train = data_dict[f"z_ohe_encoded_train_{fold_num}"] 
        z_ohe_encoded_val = data_dict[f"z_ohe_encoded_val_{fold_num}"] 
        z_ohe_encoded_test = data_dict[f"z_ohe_encoded_test_{fold_num}"] 

        z_target_encoded_train = data_dict[f"z_target_encoded_train_{fold_num}"] 
        z_target_encoded_val = data_dict[f"z_target_encoded_val_{fold_num}"] 
        z_target_encoded_test = data_dict[f"z_target_encoded_test_{fold_num}"] 
        
        target_encoding_time = data_dict[f"target_encoding_time_{fold_num}"]
        ohe_encoding_time = data_dict[f"ohe_encoding_time_{fold_num}"]
        
        x_cols = data_dict[f"X_train_{fold_num}"].columns
        X_train = data_dict[f"X_train_{fold_num}"]
        Z_train = data_dict[f"Z_train_{fold_num}"]
        y_train = data_dict[f"y_train_{fold_num}"]

        X_val = data_dict[f"X_val_{fold_num}"]
        Z_val = data_dict[f"Z_val_{fold_num}"]
        y_val = data_dict[f"y_val_{fold_num}"]

        X_test = data_dict[f"X_test_{fold_num}"]
        Z_test = data_dict[f"Z_test_{fold_num}"]
        y_test = data_dict[f"y_test_{fold_num}"]
    
        if not os.path.exists(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle"):

            tf.random.set_seed(RS+fold_num)
            np.random.seed(RS+fold_num)

            qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1
            
            X_train = tf.convert_to_tensor(X_train)
            Z_train = tf.convert_to_tensor(Z_train,dtype=tf.int32)
            y_train = tf.convert_to_tensor(y_train)

            X_val = tf.convert_to_tensor(X_val)
            Z_val = tf.convert_to_tensor(Z_val,dtype=tf.int32)
            y_val = tf.convert_to_tensor(y_val)

            X_test = tf.convert_to_tensor(X_test)
            Z_test = tf.convert_to_tensor(Z_test,dtype=tf.int32)
            y_test = tf.convert_to_tensor(y_test)

            if target == "categorical":
                n_classes = np.unique(y_train).shape[0]
            elif target=="binary":
                n_classes = 1
            
            y_train = tf.one_hot(tf.cast(y_train,tf.int32),n_classes)
            y_val = tf.one_hot(tf.cast(y_val,tf.int32),n_classes)
            y_test = tf.one_hot(tf.cast(y_test,tf.int32),n_classes)
            
            ##### GMENN #####
            d = X_train.shape[1] # columns
            n = X_train.shape[0] # rows
            num_outputs = n_classes
            perc_numeric = d/(d+Z_train.shape[1])

#             qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

            set_seed(RS)

            fe_model, optimizer = get_model(model_name=model_name, input_size=X_train.shape[1], 
                                              output_size=num_outputs, 
                                              target=target, 
                                              perc_numeric=perc_numeric, RS=RS)
            
            if dataset_name=="eucalyptus":
                optimizer.learning_rate.assign(optimizer.learning_rate*10)
        
        
            initial_stds = np.ones([len(qs),num_outputs]).astype(float).tolist()

            me_model = MixedEffectsNetwork(X_train, Z_train, y_train, fe_model, 
                                           target=target, qs=qs,
                                           initial_stds=initial_stds,
                                          fe_loss_weight=1.,
                                           mode="intercepts",
                                           early_stopping_fe=early_stopping,
                                          )    

            me_model.compile(
                loss_class_me = loss_use()(),
                loss_class_fe = loss_use()(),
            #     metric_class_me = tf.keras.metrics.AUC(multi_label=True, name="auc_me"),
            #     metric_class_fe = tf.keras.metrics.AUC(multi_label=True, name="auc_fe"),
                optimizer=optimizer
            )

            mcmc = MCMCSamplingCallback(num_mcmc_samples=1,
                                        perc_burnin=0.7,
                                        warm_restart=None,
                                        num_burnin_steps=1,
                                        step_size = 0.1#initial_step_size,
                                   )

            print_metric = PrintMetrics(X_train, Z_train, y_train, X_val, Z_val, y_val)

            start = time.time()
            history = me_model.fit([X_train,Z_train], y_train,
                         callbacks=[mcmc,
                                    print_metric,
                                    tf.keras.callbacks.EarlyStopping(monitor="me_auc_val", patience=early_stopping, mode="max")],
                         epochs=epochs,
                         validation_data=[[X_val,Z_val],y_val],
                        batch_size=batch_size)

            end = time.time()
            fit_time_gmenn = round(end-start,2)

            y_train_pred_gmenn, y_train_pred_gmenn_fe = me_model([X_train,Z_train])
            y_val_pred_gmenn, y_val_pred_gmenn_fe = me_model([X_val,Z_val])
            y_test_pred_gmenn, y_test_pred_gmenn_fe = me_model([X_test,Z_test])    

            
            ###### Prepare NN Training ######
#             metrics_use = []
#             if target =="binary":
#                 metrics_use.append(tf.keras.metrics.AUC(name="auc"))
#                 metrics_use.append(tf.keras.metrics.Accuracy(name="accuracy"))
#                 metrics_use.append(F1Score(num_classes=2, average="micro", name="f1"))
#                 stop_mode = "max"
#                 activation_layer = tf.keras.activations.sigmoid
#             elif target =="categorical":
#                 metrics_use.append(tf.keras.metrics.AUC(multi_label=True, name="auc"))
#                 metrics_use.append(tf.keras.metrics.CategoricalAccuracy(name="accuracy"))
#                 metrics_use.append(F1Score(num_classes=num_outputs, average="weighted", name="f1"))
#                 stop_mode = "max"
#                 activation_layer = tf.keras.activations.softmax
#             elif target == "continuous":
#                 metrics_use.append(RSquare(name="r2"))
#                 metrics_use.append(tf.keras.metrics.MeanSquaredError(name="mse"))
#                 stop_mode = "min"            
            
#             ##### Ignore #####
#             model_nn, optimizer = get_model(model_name=model_name, 
#                                             input_size=X_train.shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)

#             model_nn.build((n,d))
#             update_layer_activation(model=model_nn, activation=activation_layer)

#             model_nn.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)

#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn = model_nn.fit(X_train, y_train,
#                          validation_data= [X_val, y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_nn = round(end-start,2)

#             y_train_pred_nn = model_nn.predict(X_train ,batch_size=batch_size)
#             y_val_pred_nn = model_nn.predict(X_val ,batch_size=batch_size)
#             y_test_pred_nn = model_nn.predict(X_test ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_nn = get_metrics(y_train[:,0], y_train_pred_nn, target=target)
#                 eval_res_val_nn = get_metrics(y_val[:,0], y_val_pred_nn, target=target)
#                 eval_res_test_nn = get_metrics(y_test[:,0], y_test_pred_nn, target=target)
#             elif target == "categorical":
#                 eval_res_train_nn = get_metrics(y_train, y_train_pred_nn, target=target)
#                 eval_res_val_nn = get_metrics(y_val, y_val_pred_nn, target=target)
#                 eval_res_test_nn = get_metrics(y_test, y_test_pred_nn, target=target)

#             ##### Target Encoding #####
#             print("\n Train Target Encoding Network")
#             model_nn_te, optimizer = get_model(model_name=model_name, 
#                                             input_size=np.append(X_train ,z_target_encoded_train, axis=1).shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             model_nn_te.build((n,np.append(X_train ,z_target_encoded_train, axis=1).shape[1]))
#             update_layer_activation(model=model_nn_te, activation=activation_layer)
#             model_nn_te.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_te = model_nn_te.fit(np.append(X_train ,z_target_encoded_train, axis=1), y_train,
#                          validation_data= [np.append(X_val ,z_target_encoded_val, axis=1), y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_te = round(end-start,2)+target_encoding_time

#             y_train_pred_nn_te = model_nn_te.predict(np.append(X_train ,z_target_encoded_train, axis=1) ,batch_size=batch_size)
#             y_val_pred_nn_te = model_nn_te.predict(np.append(X_val ,z_target_encoded_val, axis=1) ,batch_size=batch_size)
#             y_test_pred_nn_te = model_nn_te.predict(np.append(X_test ,z_target_encoded_test, axis=1) ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_nn_te = get_metrics(y_train[:,0], y_train_pred_nn_te, target=target)
#                 eval_res_val_nn_te = get_metrics(y_val[:,0], y_val_pred_nn_te, target=target)
#                 eval_res_test_nn_te = get_metrics(y_test[:,0], y_test_pred_nn_te, target=target)
#             elif target == "categorical":
#                 eval_res_train_nn_te = get_metrics(y_train, y_train_pred_nn_te, target=target)
#                 eval_res_val_nn_te = get_metrics(y_val, y_val_pred_nn_te, target=target)
#                 eval_res_test_nn_te = get_metrics(y_test, y_test_pred_nn_te, target=target)

#             ##### OHE #####
#             print("\n Train OHE Network")
#             model_nn_ohe, optimizer = get_model(model_name=model_name, 
#                                             input_size=np.append(X_train ,z_ohe_encoded_train, axis=1).shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             model_nn_ohe.build((n,np.append(X_train ,z_ohe_encoded_train, axis=1).shape[1]))
#             update_layer_activation(model=model_nn_ohe, activation=activation_layer)
#             model_nn_ohe.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_ohe = model_nn_ohe.fit(np.append(X_train ,z_ohe_encoded_train, axis=1), y_train,
#                          validation_data= [np.append(X_val ,z_ohe_encoded_val, axis=1), y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_ohe = round(end-start,2)+ohe_encoding_time

#             y_train_pred_nn_ohe = model_nn_ohe.predict(np.append(X_train ,z_ohe_encoded_train, axis=1), batch_size=batch_size)
#             y_val_pred_nn_ohe = model_nn_ohe.predict(np.append(X_val ,z_ohe_encoded_val, axis=1), batch_size=batch_size)
#             y_test_pred_nn_ohe = model_nn_ohe.predict(np.append(X_test ,z_ohe_encoded_test, axis=1), batch_size=batch_size)
            
#             if target == "binary":
#                 eval_res_train_nn_ohe = get_metrics(y_train[:,0], y_train_pred_nn_ohe, target=target)
#                 eval_res_val_nn_ohe = get_metrics(y_val[:,0], y_val_pred_nn_ohe, target=target)
#                 eval_res_test_nn_ohe = get_metrics(y_test[:,0], y_test_pred_nn_ohe, target=target)            
#             elif target == "categorical":
#                 eval_res_train_nn_ohe = get_metrics(y_train, y_train_pred_nn_ohe, target=target)
#                 eval_res_val_nn_ohe = get_metrics(y_val, y_val_pred_nn_ohe, target=target)
#                 eval_res_test_nn_ohe = get_metrics(y_test, y_test_pred_nn_ohe, target=target)
                
#             ##### Embedding #####
#             print("\n Embedding Estimate Network")

#             if embed_dims_method=="sqrt":
#                 embed_dims = [int(np.sqrt(q)) for q in qs]
#             elif embed_dims_method=="AutoGluon":
#                 embed_dims = [int(np.max([100, np.round(1.6*q**0.56)])) for q in qs]
#             else:
#                 embed_dims = [10 for q in qs]

#             input_layer = Input(shape=(d,))

#             # Define embedding layers
#             embed_inputs = []
#             embedding_layers = []
#             for q_num in range(len(qs)):
#                 Z_input_layer = Input(shape=(1,))
#                 embedding_layer = Embedding(qs[q_num], embed_dims[q_num], input_length=1)(Z_input_layer)
#                 embedding_layer = Reshape(target_shape=(embed_dims[q_num],))(embedding_layer)

#                 embed_inputs.append(Z_input_layer)
#                 embedding_layers.append(embedding_layer)

#             ### Get model layer dimensions
#             min_numeric_embed_dim = 32
#             max_numeric_embed_dim = 2056
#             max_layer_width = 2056
#             # Main dense model
#             if target == "continuous":
#                 default_layer_sizes = [256,
#                                        128]  # overall network will have 4 layers. Input layer, 256-unit hidden layer, 128-unit hidden layer, output layer.
#             else:
#                 default_sizes = [256, 128]  # will be scaled adaptively
#                 # base_size = max(1, min(num_net_outputs, 20)/2.0) # scale layer width based on number of classes
#                 base_size = max(1, min(num_outputs,
#                                        100) / 50)  # TODO: Updated because it improved model quality and made training far faster
#                 default_layer_sizes = [defaultsize * base_size for defaultsize in default_sizes]
#             layer_expansion_factor = 1  # TODO: consider scaling based on num_rows, eg: layer_expansion_factor = 2-np.exp(-max(0,train_dataset.num_examples-10000))
#             first_layer_width = int(min(max_layer_width, layer_expansion_factor * default_layer_sizes[0]))

#             # numeric embed dim
#             vector_dim = 0  # total dimensionality of vector features (I think those should be transformed string features, which we don't have)
#             prop_vector_features = perc_numeric  # Fraction of features that are numeric
#             numeric_embedding_size = int(min(max_numeric_embed_dim,
#                                              max(min_numeric_embed_dim,
#                                                  first_layer_width * prop_vector_features * np.log10(vector_dim + 10))))


#             numeric_embedding = Dense(numeric_embedding_size, activation="relu")(input_layer)

#             concat = Concatenate()([numeric_embedding] + embedding_layers)

#             base_model, optimizer = get_model(model_name=model_name, 
#                                               input_size=numeric_embedding_size + sum(embed_dims), 
#                                               output_size=num_outputs, target=target,
#                                               perc_numeric=perc_numeric, RS=RS)

#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             base_model.build((n, numeric_embedding_size + sum(embed_dims)))
#             update_layer_activation(model=base_model, activation=activation_layer)

#             layers = base_model(concat)

#             model_embed = Model(inputs=[input_layer] + embed_inputs, outputs=layers)


#             model_embed.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_embed = model_embed.fit([X_train] + [Z_train[: ,q_num] for q_num in range(len(qs))], y_train,
#                             validation_data=[[X_val] + [Z_val[: ,q_num] for q_num in range(len(qs))], y_val],
#                             epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_embed = round(end-start,2)

#             y_train_pred_embed = model_embed.predict([X_train] + [Z_train[: ,q_num] for q_num in range(len(qs))]
#                                                      ,batch_size=batch_size)
#             y_val_pred_embed = model_embed.predict([X_val] + [Z_val[: ,q_num] for q_num in range(len(qs))]
#                                                     ,batch_size=batch_size)
#             y_test_pred_embed = model_embed.predict([X_test] + [Z_test[: ,q_num] for q_num in range(len(qs))]
#                                                     ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_embed = get_metrics(y_train[:,0], y_train_pred_embed, target=target)
#                 eval_res_val_embed = get_metrics(y_val[:,0], y_val_pred_embed, target=target)
#                 eval_res_test_embed = get_metrics(y_test[:,0], y_test_pred_embed, target=target)
#             elif target == "categorical":
#                 eval_res_train_embed = get_metrics(y_train, y_train_pred_embed, target=target)
#                 eval_res_val_embed = get_metrics(y_val, y_val_pred_embed, target=target)
#                 eval_res_test_embed = get_metrics(y_test, y_test_pred_embed, target=target)

#             eval_res_train_embed, eval_res_test_embed        



            ##### Document Results #####
            
            results[dataset_name][fold_num]["histories"] = {"GMENN": history.history,
#                                                        "Ignore": history_nn.history,
#                                                        "TE": history_nn_te.history,
#                                                        "OHE": history_nn_ohe.history,
#                                                        "Embedding": history_nn_embed.history,
                                                      }
            
            results[dataset_name][fold_num]["predictions"] = {"GMENN": [y_train_pred_gmenn, y_val_pred_gmenn, y_test_pred_gmenn],
                                                        "GMENN (FE)": [y_train_pred_gmenn_fe, y_val_pred_gmenn_fe, y_test_pred_gmenn_fe],
#                                                         "Ignore": [y_train_pred_nn, y_val_pred_nn, y_test_pred_nn],
#                                                         "TE": [y_train_pred_nn_te, y_val_pred_nn_te, y_test_pred_nn_te],
#                                                         "OHE": [y_train_pred_nn_ohe, y_val_pred_nn_ohe, y_test_pred_nn_ohe],
#                                                         "Embedding": [y_train_pred_embed, y_val_pred_embed, y_test_pred_embed],
                                                     }
            
            results[dataset_name][fold_num]["times"] = {"GMENN": fit_time_gmenn,
#                                                    "Ignore": fit_time_nn,
#                                                    "TE": fit_time_te,
#                                                    "OHE": fit_time_ohe,
#                                                    "Embedding": fit_time_embed,
                                                      }
            
            results[dataset_name][fold_num]["other_info"] = {
                "GMENN": {
#                     "_stddev_z": np.array([i.numpy() for i in me_model.data_model._stddev_z]),
#                     "acceptance_rates": np.array(me_model.acceptance_rates),
#                     "random_effects": me_model.mean_samples,
#                     "all_samples": me_model.all_samples,
#                     "stds": me_model.stds
                },
            }
            
            
            with open(f"{save_path}//results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'wb') as handle:
                pickle.dump(results[dataset_name][fold_num], handle, protocol=pickle.HIGHEST_PROTOCOL)
            
            
            del X_train, X_val, X_test, y_train, y_val, y_test
#             del z_target_encoded_train, z_target_encoded_val, z_target_encoded_test
#             del z_ohe_encoded_train, z_ohe_encoded_val, z_ohe_encoded_test
            
            gc.collect()
        else:
            print(f"Load results for dataset {dataset_name}, iteration={fold_num}")
            with open(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'rb') as handle:
                results[dataset_name][fold_num] = pickle.load(handle)
        


Start training procedure for eucalyptus
Fold no. 0
Load results for dataset eucalyptus, iteration=0
Fold no. 1
Load results for dataset eucalyptus, iteration=1
Fold no. 2
Load results for dataset eucalyptus, iteration=2
Fold no. 3
Load results for dataset eucalyptus, iteration=3
Fold no. 4
Load results for dataset eucalyptus, iteration=4
Start training procedure for Midwest_survey
Fold no. 0
Load results for dataset Midwest_survey, iteration=0
Fold no. 1
Load results for dataset Midwest_survey, iteration=1
Fold no. 2
Load results for dataset Midwest_survey, iteration=2
Fold no. 3
Load results for dataset Midwest_survey, iteration=3
Fold no. 4
Load results for dataset Midwest_survey, iteration=4
Start training procedure for hpc-job-scheduling
Fold no. 0
Load results for dataset hpc-job-scheduling, iteration=0
Fold no. 1
Load results for dataset hpc-job-scheduling, iteration=1
Fold no. 2
Load results for dataset hpc-job-scheduling, iteration=2
Fold no. 3
Load results for dataset hpc-job-

## Evaluation

### Performance

In [8]:
models = ["GMENN", "TE", "OHE", "Embedding","Ignore"]

results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
        y_test = data_dict[f"y_test_{num}"]
        n_classes = np.unique(y_test).shape[0]
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = results[dataset_name][num]["predictions"][model][2]

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan}
#                 print(f"Didnt work for {dataset_name}, {num}")


Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3


In [9]:
models = ["GMENN", "TE", "OHE", "Embedding", "Ignore"]

metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN,TE,OHE,Embedding,Ignore
eucalyptus,0.89 (0.027),nan (nan),nan (nan),nan (nan),nan (nan)
Midwest_survey,0.85 (0.006),nan (nan),nan (nan),nan (nan),nan (nan)
hpc-job-scheduling,0.91 (0.006),nan (nan),nan (nan),nan (nan),nan (nan)
video-game-sales,0.78 (0.006),nan (nan),nan (nan),nan (nan),nan (nan)
okcupid-stem,0.79 (0.006),nan (nan),nan (nan),nan (nan),nan (nan)
Diabetes130US,0.64 (0.005),nan (nan),nan (nan),nan (nan),nan (nan)


In [10]:
res_df["GMENN"].apply(lambda x: float(x.split(" ")[0])).values

array([0.89, 0.85, 0.91, 0.78, 0.79, 0.64])

### Time

In [11]:
models = ["GMENN", "TE", "OHE", "Embedding", "Ignore"]
metric = "Time"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())/60
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmin()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN,TE,OHE,Embedding,Ignore
eucalyptus,1.59 (0.894),nan (nan),nan (nan),nan (nan),nan (nan)
Midwest_survey,1.75 (0.842),nan (nan),nan (nan),nan (nan),nan (nan)
hpc-job-scheduling,1.24 (0.321),nan (nan),nan (nan),nan (nan),nan (nan)
video-game-sales,0.76 (0.187),nan (nan),nan (nan),nan (nan),nan (nan)
okcupid-stem,1.86 (1.276),nan (nan),nan (nan),nan (nan),nan (nan)
Diabetes130US,6.35 (0.866),nan (nan),nan (nan),nan (nan),nan (nan)


In [12]:
res_df["GMENN"].apply(lambda x: float(x.split(" ")[0])).values

array([1.59, 1.75, 1.24, 0.76, 1.86, 6.35])