In [1]:
import sys
# Append root path 
sys.path.append("../")
sys.path.append("../lmmnn")

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

from model.mixed_effects import *
from utils.fe_models import get_model
from utils.evaluation import *
from utils.utils import *
from data.preprocessing import dataset_preprocessing

# from vis.utils.utils import apply_modifications
# helper function
def update_layer_activation(model, activation, index=-1):
    model.layers[index].activation = activation
    return model

from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Reshape, Embedding, Concatenate
from tensorflow.keras.activations import sigmoid

from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import f1_score as f1
from sklearn.model_selection import train_test_split
from category_encoders import TargetEncoder
from tensorflow_addons.metrics import F1Score

from scipy import stats
import pickle
import yaml
import time
import gc

RS = 555

SUCCESS: Found GPU: /device:GPU:0


In [2]:
class MCMCSamplingCallback(tf.keras.callbacks.Callback):
    def __init__(self,
                 num_mcmc_samples=1,
                 step_size=0.01,
                 perc_burnin=0.1,
                 num_burnin_steps=0,
                 warm_restart=None):
        super().__init__()

        self.num_mcmc_samples = tf.constant(num_mcmc_samples)
        self.perc_burnin = perc_burnin
        self.num_burnin_steps = num_burnin_steps
        self.warm_restart = warm_restart
        self.step_size = tf.Variable(step_size,trainable=False)
        self.step_sizes = []


    def on_train_begin(self, logs=None):
#         self.mcmc_kernel = tfp.mcmc.HamiltonianMonteCarlo(
#             target_log_prob_fn=self.model.target_log_prob_fn,
#             step_size=self.step_size, #0.15
#             num_leapfrog_steps=3
#         )

        self.mcmc_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
            inner_kernel=tfp.mcmc.NoUTurnSampler(
                target_log_prob_fn=self.model.target_log_prob_fn,
                step_size=self.step_size),
            num_adaptation_steps=500,
            target_accept_prob=0.651)

#         self.mcmc_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
#             inner_kernel=tfp.mcmc.NoUTurnSampler(
#                 target_log_prob_fn=self.model.target_log_prob_fn,
#                 step_size=self.step_size),
#             num_adaptation_steps=500,
#             target_accept_prob=0.651)

    def on_epoch_begin(self, epoch, logs=None):
        if epoch==0:
            self.model.all_samples.extend(([[state[num] for state in self.model.current_state] for num in range(1)]))

            self.model.mean_samples = [tf.reduce_mean([sample[q] for sample in self.model.all_samples[round(epoch*(self.perc_burnin)):]], axis=0) for q in
                                       range(len(self.model.qs))]
        if self.model.fe_pretraining:
            if self.model.fe_converged:
                self.run_sampling(epoch)
            else:
                self.model.acceptance_rates.append(-1)
        else:
            self.run_sampling(epoch)

    def run_sampling(self,epoch):
        self.model.fX.assign(self.model.fe_model(self.model.X, training=False))

        if self.model.embed_x:
            self.model.X_embedded.assign(self.model.X_embed_model(self.model.X, training=False))

        if self.model.embed_z:
            for q_num in range(len(self.model.qs)):
                self.model.Z_embedded[q_num].assign(self.model.Z_embed_models[q_num](self.model.Z[:,q_num], training=False))

                ## Find initial step size
        # if self.model.previous_kernel_results.log_accept_ratio == -np.inf:
        # if len(self.model.acceptance_rates)>0 and self.model.acceptance_rates[-1]<0.5:
#         if len(self.model.acceptance_rates)>0 and self.model.acceptance_rates[-1]<0.0001:
            # self.mcmc_kernel.parameters["step_size"] = self.mcmc_kernel.parameters["step_size"]/2
            # self.model.previous_kernel_results["new_step_size"] = self.model.previous_kernel_results.step_size/2
            # setattr(self.model.previous_kernel_results, "new_step_size", self.model.previous_kernel_results.step_size/2)
#             self.step_size.assign(self.step_size/2)
#             print(f"Adapt step size to {float(self.step_size)}")

    
        if self.warm_restart!=None and epoch>0:
            ## Warm restart
            # if self.model.previous_kernel_results.log_accept_ratio == -np.inf:
                # restart = True
            # else:
                # restart = False
            # else:
            restart = ((epoch + 1) % self.warm_restart) == 0 and epoch != 0

            if restart:
                print("\n Warm restart to unstuck the chain")
                if self.model.embed_z and self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X_embedded, self.model.Z_embedded).sample(1, seed=self.model.RS)[:-1]
                elif self.model.embed_z and not self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X, self.model.Z_embedded).sample(1, seed=self.model.RS)[:-1]
                elif not self.model.embed_z and self.model.embed_x:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X_embedded, self.model.Z).sample(1, seed=self.model.RS)[:-1]
                else:
                    self.model.current_state = self.model.data_model(self.model.fX, self.model.X, self.model.Z).sample(1, seed=self.model.RS)[:-1]

        print("\n Start sampling for epoch {} of training".format(epoch + 1))
        start = time.time()
        if epoch==0:
            self.previous_kernel_results = None
#             new_state, self.previous_kernel_results = self.get_mcmc_samples(self.model.current_state,
#                                                                                   tf.constant(self.num_mcmc_samples),
#                                                                                   None
#                                                                                                    )
        else:
#             self.step_size.assign(self.model.previous_kernel_results.inner_results.step_size)
            print(f"Adapt step size to {float(self.previous_kernel_results.inner_results.step_size)}")
        new_state, self.previous_kernel_results = self.get_mcmc_samples(self.model.current_state,
                                                                              tf.constant(self.num_mcmc_samples),
                                                                              self.previous_kernel_results
                                                                                               )
        # self.model.divide_constants.assign(
        #     list(1/np.mean(self.model.data_model._stddev_z,axis=1))+[1.])
        # self.model.divide_constants.assign(
        #     list((lambda x: 1+(x-x.mean()))(np.array(1+tf.math.softmax(1/len(self.model.qs)+0.5*tf.math.softmax(np.abs([np.mean(i) for i in self.model.previous_kernel_results.grads_target_log_prob]))))))+[1.])
        # print(np.round(self.model.divide_constants,2))
        try:
            log_accept_ratio = self.previous_kernel_results.log_accept_ratio
        except:
            log_accept_ratio = self.previous_kernel_results.inner_results.log_accept_ratio
        acceptance_rate = tf.math.exp(tf.minimum(log_accept_ratio, 0.))

        self.step_sizes.append(float(self.step_size))

        end = time.time()


        self.model.current_state = [tf.identity(i) for i in new_state]
        # Todo: Append all current states
        self.model.acceptance_rates.append(acceptance_rate)
        # self.model.all_samples.append(
        #     [tf.math.reduce_mean(self.model.current_state[q_num], axis=0) for q_num in range(len(self.model.qs))])
        self.model.all_samples.extend(([[state[num] for state in self.model.current_state] for num in range(self.num_mcmc_samples)]))

        self.model.mean_samples = [tf.reduce_mean([sample[q] for sample in self.model.all_samples[round(epoch*(self.perc_burnin)):]], axis=0) for q in
                                   range(len(self.model.qs))]

        self.model.e_step_times.append(round(end - start, 2))

    # def on_epoch_end(self, epoch, logs=None):
        for q_num in range(len(self.model.qs)):
                self.model.data_model.trainable_variables[q_num].assign(
                    tf.math.reduce_std(self.model.current_state[q_num][-1],axis=0))

        self.model.stds.append([tf.identity(i) for i in self.model.data_model._stddev_z])

    @tf.function(reduce_retracing=True)  # autograph=False, jit_compile=True, reduce_retracing=True)
    def get_mcmc_samples(self, current_state, num_mcmc_samples=tf.constant(1), previous_kernel_results=None):
        samples, _, previous_kernel_results = tfp.mcmc.sample_chain(
            kernel=self.mcmc_kernel, num_results=num_mcmc_samples,
            current_state=[state[-1] for state in current_state],
            num_burnin_steps=self.num_burnin_steps,
            trace_fn=None, previous_kernel_results=previous_kernel_results,
            return_final_kernel_results=True, seed=self.model.RS)
        #     current_state=[sample[-1] for sample in samples]

        return samples, previous_kernel_results

#### Download and save data from Pargent et al. by running "data/download_pargent2022_datasets.py before running this notebook

In [3]:
mode="cv"
hct=10
test_ratio=None
val_ratio=None
folds=5
results = {}
dataset_names = ["eucalyptus", "Midwest_survey", "hpc-job-scheduling", "video-game-sales", "okcupid-stem", "Diabetes130US"]


loss_use = lambda: tf.keras.losses.CategoricalCrossentropy
target= "categorical"
batch_size=512
epochs = 500
early_stopping = 20
model_name = "AutoGluon"
embed_dims_method = "AutoGluon"

results = {}

#######################################

for dataset_name in dataset_names:
    print(f"Start training procedure for {dataset_name}")
    data_path = f"{mode}_RS{RS}_hct{hct}"
    if mode == "cv":
        data_path += f"_{folds}folds"
    elif mode == "train_test":
        data_path += f"_split{1-test_ratio*100}-{test_ratio*100}"
    elif mode == "train_val_test":
        data_path += f"_split{round(100-(test_ratio+val_ratio)*100)}-{round(test_ratio*100)}-{round(val_ratio*100)}"

    # If no data_dict exists, run preprocessing, else load data_dict
    if not os.path.exists(f"../data/prepared/{dataset_name}/"+data_path+"/data_dict.pickle"):
        dataset_preprocessing.process_dataset(dataset_name, target, mode, RS, hct, test_ratio, val_ratio, folds)
    with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)

    z_cols = data_dict["z_cols"]
    
    results[dataset_name] = {}
    for fold_num in range(folds):
        results[dataset_name][fold_num] = {}

        print(f"Fold no. {fold_num}")
        save_path = f"../results/{dataset_name}/{data_path}/fold_{fold_num}/dual_averaging"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        
        z_ohe_encoded_train = data_dict[f"z_ohe_encoded_train_{fold_num}"] 
        z_ohe_encoded_val = data_dict[f"z_ohe_encoded_val_{fold_num}"] 
        z_ohe_encoded_test = data_dict[f"z_ohe_encoded_test_{fold_num}"] 

        z_target_encoded_train = data_dict[f"z_target_encoded_train_{fold_num}"] 
        z_target_encoded_val = data_dict[f"z_target_encoded_val_{fold_num}"] 
        z_target_encoded_test = data_dict[f"z_target_encoded_test_{fold_num}"] 
        
        target_encoding_time = data_dict[f"target_encoding_time_{fold_num}"]
        ohe_encoding_time = data_dict[f"ohe_encoding_time_{fold_num}"]
        
        x_cols = data_dict[f"X_train_{fold_num}"].columns
        X_train = data_dict[f"X_train_{fold_num}"]
        Z_train = data_dict[f"Z_train_{fold_num}"]
        y_train = data_dict[f"y_train_{fold_num}"]

        X_val = data_dict[f"X_val_{fold_num}"]
        Z_val = data_dict[f"Z_val_{fold_num}"]
        y_val = data_dict[f"y_val_{fold_num}"]

        X_test = data_dict[f"X_test_{fold_num}"]
        Z_test = data_dict[f"Z_test_{fold_num}"]
        y_test = data_dict[f"y_test_{fold_num}"]
    
        if not os.path.exists(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle"):

            tf.random.set_seed(RS+fold_num)
            np.random.seed(RS+fold_num)

            qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1
            
            X_train = tf.convert_to_tensor(X_train)
            Z_train = tf.convert_to_tensor(Z_train,dtype=tf.int32)
            y_train = tf.convert_to_tensor(y_train)

            X_val = tf.convert_to_tensor(X_val)
            Z_val = tf.convert_to_tensor(Z_val,dtype=tf.int32)
            y_val = tf.convert_to_tensor(y_val)

            X_test = tf.convert_to_tensor(X_test)
            Z_test = tf.convert_to_tensor(Z_test,dtype=tf.int32)
            y_test = tf.convert_to_tensor(y_test)

            if target == "categorical":
                n_classes = np.unique(y_train).shape[0]
            elif target=="binary":
                n_classes = 1
            
            y_train = tf.one_hot(tf.cast(y_train,tf.int32),n_classes)
            y_val = tf.one_hot(tf.cast(y_val,tf.int32),n_classes)
            y_test = tf.one_hot(tf.cast(y_test,tf.int32),n_classes)
            
            ##### GMENN #####
            d = X_train.shape[1] # columns
            n = X_train.shape[0] # rows
            num_outputs = n_classes
            perc_numeric = d/(d+Z_train.shape[1])

#             qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

            set_seed(RS)

            fe_model, optimizer = get_model(model_name=model_name, input_size=X_train.shape[1], 
                                              output_size=num_outputs, 
                                              target=target, 
                                              perc_numeric=perc_numeric, RS=RS)
            
            if dataset_name=="eucalyptus":
                optimizer.learning_rate.assign(optimizer.learning_rate*10)
        
        
            initial_stds = np.ones([len(qs),num_outputs]).astype(float).tolist()

            me_model = MixedEffectsNetwork(X_train, Z_train, y_train, fe_model, 
                                           target=target, qs=qs,
                                           initial_stds=initial_stds,
                                          fe_loss_weight=1.,
                                           mode="intercepts",
                                           early_stopping_fe=early_stopping,
                                          )    

            me_model.compile(
                loss_class_me = loss_use()(),
                loss_class_fe = loss_use()(),
            #     metric_class_me = tf.keras.metrics.AUC(multi_label=True, name="auc_me"),
            #     metric_class_fe = tf.keras.metrics.AUC(multi_label=True, name="auc_fe"),
                optimizer=optimizer
            )

            mcmc = MCMCSamplingCallback(num_mcmc_samples=1,
                                        perc_burnin=0.7,
                                        warm_restart=None,
                                        num_burnin_steps=1,
                                        step_size = 0.1#initial_step_size,
                                   )

            print_metric = PrintMetrics(X_train, Z_train, y_train, X_val, Z_val, y_val)

            start = time.time()
            history = me_model.fit([X_train,Z_train], y_train,
                         callbacks=[mcmc,
                                    print_metric,
                                    tf.keras.callbacks.EarlyStopping(monitor="me_auc_val", patience=early_stopping, mode="max")],
                         epochs=epochs,
                         validation_data=[[X_val,Z_val],y_val],
                        batch_size=batch_size)

            end = time.time()
            fit_time_gmenn = round(end-start,2)

            y_train_pred_gmenn, y_train_pred_gmenn_fe = me_model([X_train,Z_train])
            y_val_pred_gmenn, y_val_pred_gmenn_fe = me_model([X_val,Z_val])
            y_test_pred_gmenn, y_test_pred_gmenn_fe = me_model([X_test,Z_test])    

            
            ###### Prepare NN Training ######
#             metrics_use = []
#             if target =="binary":
#                 metrics_use.append(tf.keras.metrics.AUC(name="auc"))
#                 metrics_use.append(tf.keras.metrics.Accuracy(name="accuracy"))
#                 metrics_use.append(F1Score(num_classes=2, average="micro", name="f1"))
#                 stop_mode = "max"
#                 activation_layer = tf.keras.activations.sigmoid
#             elif target =="categorical":
#                 metrics_use.append(tf.keras.metrics.AUC(multi_label=True, name="auc"))
#                 metrics_use.append(tf.keras.metrics.CategoricalAccuracy(name="accuracy"))
#                 metrics_use.append(F1Score(num_classes=num_outputs, average="weighted", name="f1"))
#                 stop_mode = "max"
#                 activation_layer = tf.keras.activations.softmax
#             elif target == "continuous":
#                 metrics_use.append(RSquare(name="r2"))
#                 metrics_use.append(tf.keras.metrics.MeanSquaredError(name="mse"))
#                 stop_mode = "min"            
            
#             ##### Ignore #####
#             model_nn, optimizer = get_model(model_name=model_name, 
#                                             input_size=X_train.shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)

#             model_nn.build((n,d))
#             update_layer_activation(model=model_nn, activation=activation_layer)

#             model_nn.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)

#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn = model_nn.fit(X_train, y_train,
#                          validation_data= [X_val, y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_nn = round(end-start,2)

#             y_train_pred_nn = model_nn.predict(X_train ,batch_size=batch_size)
#             y_val_pred_nn = model_nn.predict(X_val ,batch_size=batch_size)
#             y_test_pred_nn = model_nn.predict(X_test ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_nn = get_metrics(y_train[:,0], y_train_pred_nn, target=target)
#                 eval_res_val_nn = get_metrics(y_val[:,0], y_val_pred_nn, target=target)
#                 eval_res_test_nn = get_metrics(y_test[:,0], y_test_pred_nn, target=target)
#             elif target == "categorical":
#                 eval_res_train_nn = get_metrics(y_train, y_train_pred_nn, target=target)
#                 eval_res_val_nn = get_metrics(y_val, y_val_pred_nn, target=target)
#                 eval_res_test_nn = get_metrics(y_test, y_test_pred_nn, target=target)

#             ##### Target Encoding #####
#             print("\n Train Target Encoding Network")
#             model_nn_te, optimizer = get_model(model_name=model_name, 
#                                             input_size=np.append(X_train ,z_target_encoded_train, axis=1).shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             model_nn_te.build((n,np.append(X_train ,z_target_encoded_train, axis=1).shape[1]))
#             update_layer_activation(model=model_nn_te, activation=activation_layer)
#             model_nn_te.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_te = model_nn_te.fit(np.append(X_train ,z_target_encoded_train, axis=1), y_train,
#                          validation_data= [np.append(X_val ,z_target_encoded_val, axis=1), y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_te = round(end-start,2)+target_encoding_time

#             y_train_pred_nn_te = model_nn_te.predict(np.append(X_train ,z_target_encoded_train, axis=1) ,batch_size=batch_size)
#             y_val_pred_nn_te = model_nn_te.predict(np.append(X_val ,z_target_encoded_val, axis=1) ,batch_size=batch_size)
#             y_test_pred_nn_te = model_nn_te.predict(np.append(X_test ,z_target_encoded_test, axis=1) ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_nn_te = get_metrics(y_train[:,0], y_train_pred_nn_te, target=target)
#                 eval_res_val_nn_te = get_metrics(y_val[:,0], y_val_pred_nn_te, target=target)
#                 eval_res_test_nn_te = get_metrics(y_test[:,0], y_test_pred_nn_te, target=target)
#             elif target == "categorical":
#                 eval_res_train_nn_te = get_metrics(y_train, y_train_pred_nn_te, target=target)
#                 eval_res_val_nn_te = get_metrics(y_val, y_val_pred_nn_te, target=target)
#                 eval_res_test_nn_te = get_metrics(y_test, y_test_pred_nn_te, target=target)

#             ##### OHE #####
#             print("\n Train OHE Network")
#             model_nn_ohe, optimizer = get_model(model_name=model_name, 
#                                             input_size=np.append(X_train ,z_ohe_encoded_train, axis=1).shape[1], 
#                                             output_size=num_outputs, 
#                                             target=target, 
#                                             perc_numeric=perc_numeric, RS=RS)
#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             model_nn_ohe.build((n,np.append(X_train ,z_ohe_encoded_train, axis=1).shape[1]))
#             update_layer_activation(model=model_nn_ohe, activation=activation_layer)
#             model_nn_ohe.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_ohe = model_nn_ohe.fit(np.append(X_train ,z_ohe_encoded_train, axis=1), y_train,
#                          validation_data= [np.append(X_val ,z_ohe_encoded_val, axis=1), y_val],
#                          epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_ohe = round(end-start,2)+ohe_encoding_time

#             y_train_pred_nn_ohe = model_nn_ohe.predict(np.append(X_train ,z_ohe_encoded_train, axis=1), batch_size=batch_size)
#             y_val_pred_nn_ohe = model_nn_ohe.predict(np.append(X_val ,z_ohe_encoded_val, axis=1), batch_size=batch_size)
#             y_test_pred_nn_ohe = model_nn_ohe.predict(np.append(X_test ,z_ohe_encoded_test, axis=1), batch_size=batch_size)
            
#             if target == "binary":
#                 eval_res_train_nn_ohe = get_metrics(y_train[:,0], y_train_pred_nn_ohe, target=target)
#                 eval_res_val_nn_ohe = get_metrics(y_val[:,0], y_val_pred_nn_ohe, target=target)
#                 eval_res_test_nn_ohe = get_metrics(y_test[:,0], y_test_pred_nn_ohe, target=target)            
#             elif target == "categorical":
#                 eval_res_train_nn_ohe = get_metrics(y_train, y_train_pred_nn_ohe, target=target)
#                 eval_res_val_nn_ohe = get_metrics(y_val, y_val_pred_nn_ohe, target=target)
#                 eval_res_test_nn_ohe = get_metrics(y_test, y_test_pred_nn_ohe, target=target)
                
#             ##### Embedding #####
#             print("\n Embedding Estimate Network")

#             if embed_dims_method=="sqrt":
#                 embed_dims = [int(np.sqrt(q)) for q in qs]
#             elif embed_dims_method=="AutoGluon":
#                 embed_dims = [int(np.max([100, np.round(1.6*q**0.56)])) for q in qs]
#             else:
#                 embed_dims = [10 for q in qs]

#             input_layer = Input(shape=(d,))

#             # Define embedding layers
#             embed_inputs = []
#             embedding_layers = []
#             for q_num in range(len(qs)):
#                 Z_input_layer = Input(shape=(1,))
#                 embedding_layer = Embedding(qs[q_num], embed_dims[q_num], input_length=1)(Z_input_layer)
#                 embedding_layer = Reshape(target_shape=(embed_dims[q_num],))(embedding_layer)

#                 embed_inputs.append(Z_input_layer)
#                 embedding_layers.append(embedding_layer)

#             ### Get model layer dimensions
#             min_numeric_embed_dim = 32
#             max_numeric_embed_dim = 2056
#             max_layer_width = 2056
#             # Main dense model
#             if target == "continuous":
#                 default_layer_sizes = [256,
#                                        128]  # overall network will have 4 layers. Input layer, 256-unit hidden layer, 128-unit hidden layer, output layer.
#             else:
#                 default_sizes = [256, 128]  # will be scaled adaptively
#                 # base_size = max(1, min(num_net_outputs, 20)/2.0) # scale layer width based on number of classes
#                 base_size = max(1, min(num_outputs,
#                                        100) / 50)  # TODO: Updated because it improved model quality and made training far faster
#                 default_layer_sizes = [defaultsize * base_size for defaultsize in default_sizes]
#             layer_expansion_factor = 1  # TODO: consider scaling based on num_rows, eg: layer_expansion_factor = 2-np.exp(-max(0,train_dataset.num_examples-10000))
#             first_layer_width = int(min(max_layer_width, layer_expansion_factor * default_layer_sizes[0]))

#             # numeric embed dim
#             vector_dim = 0  # total dimensionality of vector features (I think those should be transformed string features, which we don't have)
#             prop_vector_features = perc_numeric  # Fraction of features that are numeric
#             numeric_embedding_size = int(min(max_numeric_embed_dim,
#                                              max(min_numeric_embed_dim,
#                                                  first_layer_width * prop_vector_features * np.log10(vector_dim + 10))))


#             numeric_embedding = Dense(numeric_embedding_size, activation="relu")(input_layer)

#             concat = Concatenate()([numeric_embedding] + embedding_layers)

#             base_model, optimizer = get_model(model_name=model_name, 
#                                               input_size=numeric_embedding_size + sum(embed_dims), 
#                                               output_size=num_outputs, target=target,
#                                               perc_numeric=perc_numeric, RS=RS)

#             if dataset_name=="eucalyptus":
#                 optimizer.learning_rate.assign(optimizer.learning_rate*10)
#             base_model.build((n, numeric_embedding_size + sum(embed_dims)))
#             update_layer_activation(model=base_model, activation=activation_layer)

#             layers = base_model(concat)

#             model_embed = Model(inputs=[input_layer] + embed_inputs, outputs=layers)


#             model_embed.compile(loss=loss_use()(), optimizer=optimizer, metrics = metrics_use)
#             callback = tf.keras.callbacks.EarlyStopping(monitor="val_auc", patience=early_stopping, mode=stop_mode)

#             start = time.time()
#             history_nn_embed = model_embed.fit([X_train] + [Z_train[: ,q_num] for q_num in range(len(qs))], y_train,
#                             validation_data=[[X_val] + [Z_val[: ,q_num] for q_num in range(len(qs))], y_val],
#                             epochs=epochs, batch_size=batch_size, callbacks=[callback])
#             end = time.time()
#             fit_time_embed = round(end-start,2)

#             y_train_pred_embed = model_embed.predict([X_train] + [Z_train[: ,q_num] for q_num in range(len(qs))]
#                                                      ,batch_size=batch_size)
#             y_val_pred_embed = model_embed.predict([X_val] + [Z_val[: ,q_num] for q_num in range(len(qs))]
#                                                     ,batch_size=batch_size)
#             y_test_pred_embed = model_embed.predict([X_test] + [Z_test[: ,q_num] for q_num in range(len(qs))]
#                                                     ,batch_size=batch_size)

#             if target == "binary":
#                 eval_res_train_embed = get_metrics(y_train[:,0], y_train_pred_embed, target=target)
#                 eval_res_val_embed = get_metrics(y_val[:,0], y_val_pred_embed, target=target)
#                 eval_res_test_embed = get_metrics(y_test[:,0], y_test_pred_embed, target=target)
#             elif target == "categorical":
#                 eval_res_train_embed = get_metrics(y_train, y_train_pred_embed, target=target)
#                 eval_res_val_embed = get_metrics(y_val, y_val_pred_embed, target=target)
#                 eval_res_test_embed = get_metrics(y_test, y_test_pred_embed, target=target)

#             eval_res_train_embed, eval_res_test_embed        



            ##### Document Results #####
            
            results[dataset_name][fold_num]["histories"] = {"GMENN": history.history,
#                                                        "Ignore": history_nn.history,
#                                                        "TE": history_nn_te.history,
#                                                        "OHE": history_nn_ohe.history,
#                                                        "Embedding": history_nn_embed.history,
                                                      }
            
            results[dataset_name][fold_num]["predictions"] = {"GMENN": [y_train_pred_gmenn, y_val_pred_gmenn, y_test_pred_gmenn],
                                                        "GMENN (FE)": [y_train_pred_gmenn_fe, y_val_pred_gmenn_fe, y_test_pred_gmenn_fe],
#                                                         "Ignore": [y_train_pred_nn, y_val_pred_nn, y_test_pred_nn],
#                                                         "TE": [y_train_pred_nn_te, y_val_pred_nn_te, y_test_pred_nn_te],
#                                                         "OHE": [y_train_pred_nn_ohe, y_val_pred_nn_ohe, y_test_pred_nn_ohe],
#                                                         "Embedding": [y_train_pred_embed, y_val_pred_embed, y_test_pred_embed],
                                                     }
            
            results[dataset_name][fold_num]["times"] = {"GMENN": fit_time_gmenn,
#                                                    "Ignore": fit_time_nn,
#                                                    "TE": fit_time_te,
#                                                    "OHE": fit_time_ohe,
#                                                    "Embedding": fit_time_embed,
                                                      }
            
            results[dataset_name][fold_num]["other_info"] = {
                "GMENN": {
#                     "_stddev_z": np.array([i.numpy() for i in me_model.data_model._stddev_z]),
#                     "acceptance_rates": np.array(me_model.acceptance_rates),
#                     "random_effects": me_model.mean_samples,
#                     "all_samples": me_model.all_samples,
#                     "stds": me_model.stds
                },
            }
            
            
            with open(f"{save_path}//results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'wb') as handle:
                pickle.dump(results[dataset_name][fold_num], handle, protocol=pickle.HIGHEST_PROTOCOL)
            
            
            del X_train, X_val, X_test, y_train, y_val, y_test
#             del z_target_encoded_train, z_target_encoded_val, z_target_encoded_test
#             del z_ohe_encoded_train, z_ohe_encoded_val, z_ohe_encoded_test
            
            gc.collect()
        else:
            print(f"Load results for dataset {dataset_name}, iteration={fold_num}")
            with open(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'rb') as handle:
                results[dataset_name][fold_num] = pickle.load(handle)
        


Start training procedure for eucalyptus
Fold no. 0
Random seed set as 555


2024-01-18 16:26:35.360477: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory



 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.46754327416419983
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.17662160098552704
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.5712532997131348
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.22003073990345
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.19742809236049652
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.3579099476337433
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.14440128207206726
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 0.04182971641421318
Epoch 11/500

 Start sampling for epoch 12

Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.08545905351638794
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.04195152223110199
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.018647121265530586
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.10563501715660095
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.053189050406217575
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.027264736592769623
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.043910346925258636
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.02293252758681774
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.023738566786050797
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.06711024045944214
Epoch 31/500

 Start sampling for epoch 32 of training


Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 0.008283151313662529
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.00527170579880476
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.004905475303530693
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.002641610335558653
Epoch 49/500

 Start sampling for epoch 50 of training
Adapt step size to 0.0036938036791980267
Epoch 50/500

 Start sampling for epoch 51 of training
Adapt step size to 0.004931230563670397
Epoch 51/500

 Start sampling for epoch 52 of training
Adapt step size to 0.0035237816628068686
Epoch 52/500

 Start sampling for epoch 53 of training
Adapt step size to 0.00445981603115797
Epoch 53/500

 Start sampling for epoch 54 of training
Adapt step size to 0.003256453201174736
Epoch 54/500

 Start sampling for epoch 55 of training
Adapt step size to 0.0033133532851934433
Epoch 55/500

 Start sampling for epoch 56 of tra

Epoch 69/500

 Start sampling for epoch 70 of training
Adapt step size to 0.0017747055971994996
Epoch 70/500

 Start sampling for epoch 71 of training
Adapt step size to 0.0011448622681200504
Epoch 71/500

 Start sampling for epoch 72 of training
Adapt step size to 0.0025003373157233
Epoch 72/500

 Start sampling for epoch 73 of training
Adapt step size to 0.0016174936899915338
Epoch 73/500

 Start sampling for epoch 74 of training
Adapt step size to 0.0011011356255039573
Epoch 74/500

 Start sampling for epoch 75 of training
Adapt step size to 0.0014099797699600458
Epoch 75/500

 Start sampling for epoch 76 of training
Adapt step size to 0.0010750530054792762
Epoch 76/500

 Start sampling for epoch 77 of training
Adapt step size to 0.0012840755516663194
Epoch 77/500

 Start sampling for epoch 78 of training
Adapt step size to 0.0011729109100997448
Epoch 78/500

 Start sampling for epoch 79 of training
Adapt step size to 0.0008755294256843626
Epoch 79/500

 Start sampling for epoch 80 

Epoch 93/500

 Start sampling for epoch 94 of training
Adapt step size to 0.0003420035354793072
Epoch 94/500

 Start sampling for epoch 95 of training
Adapt step size to 0.00036240729968994856
Epoch 95/500

 Start sampling for epoch 96 of training
Adapt step size to 0.00020947837037965655
Epoch 96/500

 Start sampling for epoch 97 of training
Adapt step size to 0.0003412789083085954
Epoch 97/500

 Start sampling for epoch 98 of training
Adapt step size to 0.0006651732255704701
Epoch 98/500

 Start sampling for epoch 99 of training
Adapt step size to 0.00045602404861710966
Epoch 99/500

 Start sampling for epoch 100 of training
Adapt step size to 0.00035414635203778744
Epoch 100/500

 Start sampling for epoch 101 of training
Adapt step size to 0.00038326563662849367
Epoch 101/500
Fold no. 1
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of 

Epoch 16/500

 Start sampling for epoch 17 of training
Adapt step size to 0.30828943848609924
Epoch 17/500

 Start sampling for epoch 18 of training
Adapt step size to 0.1402227133512497
Epoch 18/500

 Start sampling for epoch 19 of training
Adapt step size to 0.3449696898460388
Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 0.16050678491592407
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 0.08537160605192184
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.2590813934803009
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.12514673173427582
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.061702996492385864
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.14183638989925385
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.07113233953714371
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt s

Epoch 40/500

 Start sampling for epoch 41 of training
Adapt step size to 0.013947810046374798
Epoch 41/500

 Start sampling for epoch 42 of training
Adapt step size to 0.016128702089190483
Epoch 42/500

 Start sampling for epoch 43 of training
Adapt step size to 0.01589314080774784
Epoch 43/500
Fold no. 2
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.46754327416419983
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.17662160098552704
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.054794758558273315
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.49652257561683655
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.19

Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.048983730375766754
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.1784074902534485
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.08752088993787766
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.06541475653648376
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.07818298786878586
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.04444015398621559
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.08995373547077179
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.04655475914478302
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.03612005338072777
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.14624187350273132
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt

Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 0.01802825927734375
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.03396874666213989
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.01987290196120739
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.02376534789800644
Epoch 49/500
Fold no. 3
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.46754327416419983
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.17662160098552704
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.049833040684461594
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.45


 Start sampling for epoch 21 of training
Adapt step size to 0.17803996801376343
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.08557306230068207
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.032841261476278305
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.18290074169635773
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.09056444466114044
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.045732080936431885
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.02319280058145523
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.12220977246761322
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.06300386786460876
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.08357294648885727
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size 

Epoch 44/500

 Start sampling for epoch 45 of training
Adapt step size to 0.012446500360965729
Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 0.013328964821994305
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.013615402393043041
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.01850097067654133
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.010927551425993443
Epoch 49/500

 Start sampling for epoch 50 of training
Adapt step size to 0.005838761106133461
Epoch 50/500

 Start sampling for epoch 51 of training
Adapt step size to 0.013878481462597847
Epoch 51/500

 Start sampling for epoch 52 of training
Adapt step size to 0.008317290805280209
Epoch 52/500

 Start sampling for epoch 53 of training
Adapt step size to 0.007827863097190857
Epoch 53/500

 Start sampling for epoch 54 of training
Adapt step size to 0.007648240774869919
Epoch 54/500

 Start sampling for epoch 55 of train

Epoch 68/500

 Start sampling for epoch 69 of training
Adapt step size to 0.0016142266103997827
Epoch 69/500

 Start sampling for epoch 70 of training
Adapt step size to 0.0016647666925564408
Epoch 70/500

 Start sampling for epoch 71 of training
Adapt step size to 0.0017466058488935232
Epoch 71/500

 Start sampling for epoch 72 of training
Adapt step size to 0.001571354572661221
Epoch 72/500

 Start sampling for epoch 73 of training
Adapt step size to 0.001931605045683682
Epoch 73/500

 Start sampling for epoch 74 of training
Adapt step size to 0.0013050757115706801
Epoch 74/500

 Start sampling for epoch 75 of training
Adapt step size to 0.0016728354385122657
Epoch 75/500

 Start sampling for epoch 76 of training
Adapt step size to 0.0010924721136689186
Epoch 76/500

 Start sampling for epoch 77 of training
Adapt step size to 0.0008877972140908241
Epoch 77/500

 Start sampling for epoch 78 of training
Adapt step size to 0.0008468859596177936
Epoch 78/500

 Start sampling for epoch 79

Epoch 92/500

 Start sampling for epoch 93 of training
Adapt step size to 0.0002858297375496477
Epoch 93/500

 Start sampling for epoch 94 of training
Adapt step size to 0.00047152218758128583
Epoch 94/500

 Start sampling for epoch 95 of training
Adapt step size to 0.0003571541456039995
Epoch 95/500

 Start sampling for epoch 96 of training
Adapt step size to 0.0002719295443966985
Epoch 96/500

 Start sampling for epoch 97 of training
Adapt step size to 0.0003442752349656075
Epoch 97/500

 Start sampling for epoch 98 of training
Adapt step size to 0.00017222974565811455
Epoch 98/500

 Start sampling for epoch 99 of training
Adapt step size to 0.00020356327877379954
Epoch 99/500

 Start sampling for epoch 100 of training
Adapt step size to 0.00022021314362064004
Epoch 100/500

 Start sampling for epoch 101 of training
Adapt step size to 0.00024067853519227356
Epoch 101/500

 Start sampling for epoch 102 of training
Adapt step size to 0.0001542804966447875
Epoch 102/500

 Start sampling

Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.46754327416419983
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.17662160098552704
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.059767577797174454
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.5415955185890198
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.2110760509967804
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.44287195801734924
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.17813731729984283
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 0.20487189292907715
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 0.2916814386844635
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step size to 0.1

Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.04694908857345581
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.05396472290158272
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.05449458956718445
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.047560010105371475
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.09615042060613632
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt step size to 0.05086662247776985
Epoch 32/500

 Start sampling for epoch 33 of training
Adapt step size to 0.10582765936851501
Epoch 33/500

 Start sampling for epoch 34 of training
Adapt step size to 0.056712549179792404
Epoch 34/500

 Start sampling for epoch 35 of training
Adapt step size to 0.0308083426207304
Epoch 35/500

 Start sampling for epoch 36 of training
Adapt step size to 0.02656812220811844
Epoch 36/500

 Start sampling for epoch 37 of training
Adap


 Start sampling for epoch 6 of training
Adapt step size to 0.052996546030044556
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.480224072933197
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.18732477724552155
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.09711362421512604
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.10066162049770355
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 0.0536351203918457
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 0.40671294927597046
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step size to 0.17166075110435486
Epoch 13/500

 Start sampling for epoch 14 of training
Adapt step size to 0.1944212019443512
Epoch 14/500

 Start sampling for epoch 15 of training
Adapt step size to 0.21246737241744995
Epoch 15/500

 Start sampling for epoch 16 of training
Adapt step size to 0.10190108


 Start sampling for epoch 30 of training
Adapt step size to 0.18632592260837555
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.08802402764558792
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt step size to 0.39191681146621704
Epoch 32/500

 Start sampling for epoch 33 of training
Adapt step size to 0.20544365048408508
Epoch 33/500

 Start sampling for epoch 34 of training
Adapt step size to 0.10927346348762512
Epoch 34/500

 Start sampling for epoch 35 of training
Adapt step size to 0.3700273633003235
Epoch 35/500

 Start sampling for epoch 36 of training
Adapt step size to 0.19820477068424225
Epoch 36/500

 Start sampling for epoch 37 of training
Adapt step size to 0.1719476580619812
Epoch 37/500

 Start sampling for epoch 38 of training
Adapt step size to 0.10861053317785263
Epoch 38/500

 Start sampling for epoch 39 of training
Adapt step size to 0.3356013000011444
Epoch 39/500

 Start sampling for epoch 40 of training
Adapt step size to 0.


 Start sampling for epoch 54 of training
Adapt step size to 0.14457671344280243
Epoch 54/500

 Start sampling for epoch 55 of training
Adapt step size to 0.12050612270832062
Epoch 55/500

 Start sampling for epoch 56 of training
Adapt step size to 0.10983414202928543
Epoch 56/500

 Start sampling for epoch 57 of training
Adapt step size to 0.37002861499786377
Epoch 57/500

 Start sampling for epoch 58 of training
Adapt step size to 0.22117926180362701
Epoch 58/500

 Start sampling for epoch 59 of training
Adapt step size to 0.1649317890405655
Epoch 59/500

 Start sampling for epoch 60 of training
Adapt step size to 0.11121226847171783
Epoch 60/500

 Start sampling for epoch 61 of training
Adapt step size to 0.33235976099967957
Epoch 61/500

 Start sampling for epoch 62 of training
Adapt step size to 0.20170016586780548
Epoch 62/500

 Start sampling for epoch 63 of training
Adapt step size to 0.12325052917003632
Epoch 63/500

 Start sampling for epoch 64 of training
Adapt step size to 


 Start sampling for epoch 78 of training
Adapt step size to 0.12210123986005783
Epoch 78/500

 Start sampling for epoch 79 of training
Adapt step size to 0.2869376838207245
Epoch 79/500

 Start sampling for epoch 80 of training
Adapt step size to 0.24794992804527283
Epoch 80/500

 Start sampling for epoch 81 of training
Adapt step size to 0.1824868768453598
Epoch 81/500

 Start sampling for epoch 82 of training
Adapt step size to 0.2614139914512634
Epoch 82/500

 Start sampling for epoch 83 of training
Adapt step size to 0.19466346502304077
Epoch 83/500

 Start sampling for epoch 84 of training
Adapt step size to 0.25274771451950073
Epoch 84/500

 Start sampling for epoch 85 of training
Adapt step size to 0.17770668864250183
Epoch 85/500

 Start sampling for epoch 86 of training
Adapt step size to 0.17399044334888458
Epoch 86/500

 Start sampling for epoch 87 of training
Adapt step size to 0.1320309340953827
Epoch 87/500

 Start sampling for epoch 88 of training
Adapt step size to 0.2

Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.3581821620464325
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.30662423372268677
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.12197472155094147
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.30566245317459106
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 0.16094206273555756
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 0.47108006477355957
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step size to 0.19833533465862274
Epoch 13/500

 Start sampling for epoch 14 of training
Adapt step size to 0.10307390242815018
Epoch 14/500

 Start sampling for epoch 15 of training
Adapt step size to 0.07220542430877686
Epoch 15/500

 Start sampling for epoch 16 of training
Adapt step size to 0.03906643018126488
Epoch 16/500

 Start sampling for epoch 17 of training
Adapt step si

Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.19364604353904724
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt step size to 0.11001051217317581
Epoch 32/500

 Start sampling for epoch 33 of training
Adapt step size to 0.07585612684488297
Epoch 33/500

 Start sampling for epoch 34 of training
Adapt step size to 0.2938801646232605
Epoch 34/500

 Start sampling for epoch 35 of training
Adapt step size to 0.24815434217453003
Epoch 35/500

 Start sampling for epoch 36 of training
Adapt step size to 0.14837764203548431
Epoch 36/500

 Start sampling for epoch 37 of training
Adapt step size to 0.17202326655387878
Epoch 37/500

 Start sampling for epoch 38 of training
Adapt step size to 0.14895939826965332
Epoch 38/500

 Start sampling for epoch 39 of training
Adapt step size to 0.27769771218299866
Epoch 39/500

 Start sampling for epoch 40 of training
Adapt step size to 0.15296539664268494
Epoch 40/500

 Start sampling for epoch 41 of training
Adapt 

Epoch 54/500

 Start sampling for epoch 55 of training
Adapt step size to 0.12230223417282104
Epoch 55/500

 Start sampling for epoch 56 of training
Adapt step size to 0.2714156210422516
Epoch 56/500

 Start sampling for epoch 57 of training
Adapt step size to 0.16280624270439148
Epoch 57/500

 Start sampling for epoch 58 of training
Adapt step size to 0.15309999883174896
Epoch 58/500

 Start sampling for epoch 59 of training
Adapt step size to 0.13447310030460358
Epoch 59/500

 Start sampling for epoch 60 of training
Adapt step size to 0.17057010531425476
Epoch 60/500

 Start sampling for epoch 61 of training
Adapt step size to 0.13647256791591644
Epoch 61/500

 Start sampling for epoch 62 of training
Adapt step size to 0.26202306151390076
Epoch 62/500

 Start sampling for epoch 63 of training
Adapt step size to 0.24093522131443024
Epoch 63/500

 Start sampling for epoch 64 of training
Adapt step size to 0.18693102896213531
Epoch 64/500

 Start sampling for epoch 65 of training
Adapt 

Epoch 78/500

 Start sampling for epoch 79 of training
Adapt step size to 0.2878454625606537
Epoch 79/500

 Start sampling for epoch 80 of training
Adapt step size to 0.18424145877361298
Epoch 80/500

 Start sampling for epoch 81 of training
Adapt step size to 0.22942499816417694
Epoch 81/500

 Start sampling for epoch 82 of training
Adapt step size to 0.1480521261692047
Epoch 82/500

 Start sampling for epoch 83 of training
Adapt step size to 0.2501224875450134
Epoch 83/500

 Start sampling for epoch 84 of training
Adapt step size to 0.18846547603607178
Epoch 84/500

 Start sampling for epoch 85 of training
Adapt step size to 0.12271348387002945
Epoch 85/500

 Start sampling for epoch 86 of training
Adapt step size to 0.34116125106811523
Epoch 86/500

 Start sampling for epoch 87 of training
Adapt step size to 0.287265807390213
Epoch 87/500

 Start sampling for epoch 88 of training
Adapt step size to 0.24135930836200714
Epoch 88/500

 Start sampling for epoch 89 of training
Adapt step

Epoch 102/500

 Start sampling for epoch 103 of training
Adapt step size to 0.34078249335289
Epoch 103/500

 Start sampling for epoch 104 of training
Adapt step size to 0.2290147989988327
Epoch 104/500

 Start sampling for epoch 105 of training
Adapt step size to 0.20452822744846344
Epoch 105/500
Fold no. 2
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.5818501710891724
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.06097309663891792
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.35036367177963257
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.13068504631519318
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.507653534412384
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.19552861154079437
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.1380

Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.21169641613960266
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.10659072548151016
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.43102359771728516
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.2108999639749527
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.11147943884134293
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.3491327464580536
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.17610955238342285
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.11239833384752274
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.07762449979782104
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.3312687873840332
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt st

Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 0.2354341447353363
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.16795861721038818
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.0886765792965889
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.31709131598472595
Epoch 49/500

 Start sampling for epoch 50 of training
Adapt step size to 0.18362608551979065
Epoch 50/500

 Start sampling for epoch 51 of training
Adapt step size to 0.11418569833040237
Epoch 51/500

 Start sampling for epoch 52 of training
Adapt step size to 0.1443161815404892
Epoch 52/500

 Start sampling for epoch 53 of training
Adapt step size to 0.22543811798095703
Epoch 53/500

 Start sampling for epoch 54 of training
Adapt step size to 0.15593960881233215
Epoch 54/500

 Start sampling for epoch 55 of training
Adapt step size to 0.2035953551530838
Epoch 55/500

 Start sampling for epoch 56 of training
Adapt ste

Epoch 69/500

 Start sampling for epoch 70 of training
Adapt step size to 0.12418675422668457
Epoch 70/500

 Start sampling for epoch 71 of training
Adapt step size to 0.32340726256370544
Epoch 71/500

 Start sampling for epoch 72 of training
Adapt step size to 0.20254100859165192
Epoch 72/500

 Start sampling for epoch 73 of training
Adapt step size to 0.23192210495471954
Epoch 73/500

 Start sampling for epoch 74 of training
Adapt step size to 0.1768295168876648
Epoch 74/500

 Start sampling for epoch 75 of training
Adapt step size to 0.18680419027805328
Epoch 75/500

 Start sampling for epoch 76 of training
Adapt step size to 0.1942167431116104
Epoch 76/500

 Start sampling for epoch 77 of training
Adapt step size to 0.1463235765695572
Epoch 77/500

 Start sampling for epoch 78 of training
Adapt step size to 0.12378080934286118
Epoch 78/500

 Start sampling for epoch 79 of training
Adapt step size to 0.29860809445381165
Epoch 79/500

 Start sampling for epoch 80 of training
Adapt st

Epoch 93/500

 Start sampling for epoch 94 of training
Adapt step size to 0.26667553186416626
Epoch 94/500

 Start sampling for epoch 95 of training
Adapt step size to 0.1764666587114334
Epoch 95/500

 Start sampling for epoch 96 of training
Adapt step size to 0.2335784137248993
Epoch 96/500

 Start sampling for epoch 97 of training
Adapt step size to 0.29101845622062683
Epoch 97/500

 Start sampling for epoch 98 of training
Adapt step size to 0.19360539317131042
Epoch 98/500
Fold no. 3
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.46754327416419983
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.17662160098552704
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.164

Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 0.325631320476532
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 0.15371790528297424
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 0.40225914120674133
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.19306784868240356
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.05649135261774063
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.315135657787323
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.2512941360473633
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.24635784327983856
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.16139507293701172
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.24697738885879517
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step

Epoch 43/500

 Start sampling for epoch 44 of training
Adapt step size to 0.1215316653251648
Epoch 44/500

 Start sampling for epoch 45 of training
Adapt step size to 0.3983447849750519
Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 0.22588756680488586
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.16896483302116394
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.15143704414367676
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.10281621664762497
Epoch 49/500

 Start sampling for epoch 50 of training
Adapt step size to 0.30180445313453674
Epoch 50/500

 Start sampling for epoch 51 of training
Adapt step size to 0.17606709897518158
Epoch 51/500

 Start sampling for epoch 52 of training
Adapt step size to 0.3953717350959778
Epoch 52/500

 Start sampling for epoch 53 of training
Adapt step size to 0.23155038058757782
Epoch 53/500

 Start sampling for epoch 54 of training
Adapt st

Epoch 67/500

 Start sampling for epoch 68 of training
Adapt step size to 0.31060752272605896
Epoch 68/500

 Start sampling for epoch 69 of training
Adapt step size to 0.19295348227024078
Epoch 69/500

 Start sampling for epoch 70 of training
Adapt step size to 0.23713882267475128
Epoch 70/500

 Start sampling for epoch 71 of training
Adapt step size to 0.1521609127521515
Epoch 71/500

 Start sampling for epoch 72 of training
Adapt step size to 0.36262834072113037
Epoch 72/500

 Start sampling for epoch 73 of training
Adapt step size to 0.22758176922798157
Epoch 73/500

 Start sampling for epoch 74 of training
Adapt step size to 0.14532838761806488
Epoch 74/500

 Start sampling for epoch 75 of training
Adapt step size to 0.2489294856786728
Epoch 75/500

 Start sampling for epoch 76 of training
Adapt step size to 0.20221467316150665
Epoch 76/500

 Start sampling for epoch 77 of training
Adapt step size to 0.17172585427761078
Epoch 77/500

 Start sampling for epoch 78 of training
Adapt s

Epoch 91/500
 Early stopping of FE by fe_auc_val at 91 epochs

 Start sampling for epoch 92 of training
Adapt step size to 0.21970796585083008
Epoch 92/500

 Start sampling for epoch 93 of training
Adapt step size to 0.2226228415966034
Epoch 93/500
Fold no. 4
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 1.8861645460128784
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.07890954613685608
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.2166086584329605
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.08882312476634979
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.3572399318218231
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.13758249580860138
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.05105192959308624
Epoch 8/500

 Start sampling for ep


 Start sampling for epoch 22 of training
Adapt step size to 0.35161980986595154
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 0.1690937876701355
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 0.1189948320388794
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 0.44457122683525085
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 0.21953020989894867
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 0.1414223611354828
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 0.15770068764686584
Epoch 28/500

 Start sampling for epoch 29 of training
Adapt step size to 0.16074654459953308
Epoch 29/500

 Start sampling for epoch 30 of training
Adapt step size to 0.3517265319824219
Epoch 30/500

 Start sampling for epoch 31 of training
Adapt step size to 0.18183787167072296
Epoch 31/500

 Start sampling for epoch 32 of training
Adapt step size to 0.1


 Start sampling for epoch 46 of training
Adapt step size to 0.30501800775527954
Epoch 46/500

 Start sampling for epoch 47 of training
Adapt step size to 0.17425686120986938
Epoch 47/500

 Start sampling for epoch 48 of training
Adapt step size to 0.1568659096956253
Epoch 48/500

 Start sampling for epoch 49 of training
Adapt step size to 0.14919015765190125
Epoch 49/500

 Start sampling for epoch 50 of training
Adapt step size to 0.0863923504948616
Epoch 50/500

 Start sampling for epoch 51 of training
Adapt step size to 0.31148093938827515


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 36/500

 Start sampling for epoch 37 of training
Adapt step size to 2.9166282032712353e-35
Epoch 37/500

 Start sampling for epoch 38 of training
Adapt step size to 2.816808184694158e-34
Epoch 38/500

 Start sampling for epoch 39 of training
Adapt step size to 2.5771012848389808e-33
Epoch 39/500

 Start sampling for epoch 40 of training
Adapt step size to 2.239034031925085e-32
Epoch 40/500

 Start sampling for epoch 41 of training
Adapt step size to 1.8515789863022455e-31
Epoch 41/500

 Start sampling for epoch 42 of training
Adapt step size to 1.4604854330153059e-30
Epoch 42/500

 Start sampling for epoch 43 of training
Adapt step size to 1.1010753645282264e-29
Epoch 43/500

 Start sampling for epoch 44 of training
Adapt step size to 7.949137419243563e-29
Epoch 44/500

 Start sampling for epoch 45 of training
Adapt step size to 5.505495116869435e-28
Epoch 45/500

 Start sampling for epoch 46 of training
Adapt step size to 3.664124089655493e-27
Epoch 46/500

 Start sampling for e


 Start sampling for epoch 8 of training
Adapt step size to 1.564861018543695e-09
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 3.516041277573656e-11
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 9.296643316281106e-13
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 2.8685068914364314e-14
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 1.0221340333304327e-15
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step size to 4.1608018609474745e-17
Epoch 13/500

 Start sampling for epoch 14 of training
Adapt step size to 1.9148395728183912e-18
Epoch 14/500

 Start sampling for epoch 15 of training
Adapt step size to 9.866148934847074e-20
Epoch 15/500

 Start sampling for epoch 16 of training
Adapt step size to 5.640891948232883e-21
Epoch 16/500

 Start sampling for epoch 17 of training
Adapt step size to 3.5500048350782026e-22
Epoch 17/500

 Start sampling for epoch 18 of traini


 Start sampling for epoch 3 of training
Adapt step size to 0.0054937382228672504
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.023714061826467514
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.0027837869711220264
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 4.18652780354023e-05
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 6.833865882072132e-07
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 1.2810211558189621e-08
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 2.8079916170042907e-10
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 7.207526676183962e-12
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 2.153434901100773e-13
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 7.422108412623914e-15
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step s


 Start sampling for epoch 50 of training
Adapt step size to 0.0
Epoch 50/500
Fold no. 3
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 0.30616337060928345
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.0054937382228672504
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.02357611060142517
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size to 0.037025026977062225
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.013598993420600891
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.005233023315668106
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.007397960405796766
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.008634059689939022
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.0036737159825861454
Epoch 10/


 Start sampling for epoch 23 of training
Adapt step size to 1.929508358373532e-14
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 1.44666281720867e-14
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 9.823131086558767e-16
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 7.664633850697081e-16
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 5.992403406777512e-16
Epoch 27/500

 Start sampling for epoch 28 of training
Adapt step size to 4.6535880934310404e-17
Epoch 28/500
Fold no. 4
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 0.30616337060928345
Epoch 2/500

 Start sampling for epoch 3 of training
Adapt step size to 0.0054937382228672504
Epoch 3/500

 Start sampling for epoch 4 of training
Adapt step size to 0.023714061826467514
Epoch 4/500

 Start sampling for epoch 5 of training
Adapt step size t


 Start sampling for epoch 19 of training
Adapt step size to 4.77326579412414e-14
Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 2.6467270266590645e-15
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 1.5975254466941898e-16
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 1.0441551330274234e-17
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 7.354480403796986e-19
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 6.371695744289311e-19
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 6.109094437534858e-18
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 5.106520929437924e-18
Epoch 26/500

 Start sampling for epoch 27 of training
Adapt step size to 4.103750552355202e-19
Epoch 27/500
Start training procedure for Diabetes130US
Fold no. 0
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 St

Epoch 15/500

 Start sampling for epoch 16 of training
Adapt step size to 5.653212408829669e-14
Epoch 16/500

 Start sampling for epoch 17 of training
Adapt step size to 4.12111174992371e-14
Epoch 17/500

 Start sampling for epoch 18 of training
Adapt step size to 3.0330423638142215e-14
Epoch 18/500

 Start sampling for epoch 19 of training
Adapt step size to 3.1193770439960633e-13
Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 2.9752606628408174e-12
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 2.06441591252271e-12
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 1.4435441774221514e-12
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 1.0165031793726853e-12
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 7.203895141007222e-13
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 5.692202351831321e-12
Epoch 25/500
 Early stopping of FE by


 Start sampling for epoch 5 of training
Adapt step size to 0.022173795849084854
Epoch 5/500

 Start sampling for epoch 6 of training
Adapt step size to 0.008083785884082317
Epoch 6/500

 Start sampling for epoch 7 of training
Adapt step size to 0.003110310761258006
Epoch 7/500

 Start sampling for epoch 8 of training
Adapt step size to 0.0012594710569828749
Epoch 8/500

 Start sampling for epoch 9 of training
Adapt step size to 0.0005344653618521988
Epoch 9/500

 Start sampling for epoch 10 of training
Adapt step size to 0.00023662469175178558
Epoch 10/500

 Start sampling for epoch 11 of training
Adapt step size to 0.0001088401404558681
Epoch 11/500

 Start sampling for epoch 12 of training
Adapt step size to 5.159240390639752e-05
Epoch 12/500

 Start sampling for epoch 13 of training
Adapt step size to 1.3850943787474534e-06
Epoch 13/500

 Start sampling for epoch 14 of training
Adapt step size to 4.222001592779634e-08
Epoch 14/500

 Start sampling for epoch 15 of training
Adapt ste


 Start sampling for epoch 18 of training
Adapt step size to 2.0350161555882096e-09
Epoch 18/500

 Start sampling for epoch 19 of training
Adapt step size to 1.264809701417846e-09
Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 7.964299220120097e-10
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 5.075023179479388e-10
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 4.027215450008725e-09
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 2.931386156035387e-08
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 2.0438756109797396e-07
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 1.3360180446397862e-06
Epoch 25/500
 Early stopping of FE by fe_auc_val at 25 epochs
Fold no. 4
Random seed set as 555

 Start sampling for epoch 1 of training
Epoch 1/500

 Start sampling for epoch 2 of training
Adapt step size to 0.30616337060928345
Epoch 2/500



Epoch 16/500

 Start sampling for epoch 17 of training
Adapt step size to 9.798053210788304e-13
Epoch 17/500

 Start sampling for epoch 18 of training
Adapt step size to 4.7345394477756736e-14
Epoch 18/500

 Start sampling for epoch 19 of training
Adapt step size to 3.4871306282000317e-14
Epoch 19/500

 Start sampling for epoch 20 of training
Adapt step size to 3.4415070619686627e-13
Epoch 20/500

 Start sampling for epoch 21 of training
Adapt step size to 3.1571787285217257e-12
Epoch 21/500

 Start sampling for epoch 22 of training
Adapt step size to 2.702159156153794e-11
Epoch 22/500

 Start sampling for epoch 23 of training
Adapt step size to 2.16544435160948e-10
Epoch 23/500

 Start sampling for epoch 24 of training
Adapt step size to 1.422079259905118e-10
Epoch 24/500

 Start sampling for epoch 25 of training
Adapt step size to 9.420800944903718e-11
Epoch 25/500

 Start sampling for epoch 26 of training
Adapt step size to 6.292014131936696e-11
Epoch 26/500

 Start sampling for epo

## Evaluation

### Performance

In [6]:
models = ["GMENN", "TE", "OHE", "Embedding","Ignore"]

results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
        y_test = data_dict[f"y_test_{num}"]
        n_classes = np.unique(y_test).shape[0]
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = results[dataset_name][num]["predictions"][model][2]

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan}
#                 print(f"Didnt work for {dataset_name}, {num}")


Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 0
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 1
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 2
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 3
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for eucalyptus, 4
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 0
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 1
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 2
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3
Set nan for Midwest_survey, 3


In [7]:
models = ["GMENN", "TE", "OHE", "Embedding", "Ignore"]

metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN,TE,OHE,Embedding,Ignore
eucalyptus,0.89 (0.03),nan (nan),nan (nan),nan (nan),nan (nan)
Midwest_survey,0.88 (0.008),nan (nan),nan (nan),nan (nan),nan (nan)
hpc-job-scheduling,0.91 (0.007),nan (nan),nan (nan),nan (nan),nan (nan)
video-game-sales,0.78 (0.008),nan (nan),nan (nan),nan (nan),nan (nan)
okcupid-stem,0.75 (0.037),nan (nan),nan (nan),nan (nan),nan (nan)
Diabetes130US,0.62 (0.015),nan (nan),nan (nan),nan (nan),nan (nan)


In [8]:
res_df["GMENN"].apply(lambda x: float(x.split(" ")[0])).values

array([0.89, 0.88, 0.91, 0.78, 0.75, 0.62])

### Time

In [9]:
models = ["GMENN", "TE", "OHE", "Embedding", "Ignore"]
metric = "Time"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())/60
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmin()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN,TE,OHE,Embedding,Ignore
eucalyptus,43.27 (49.678),nan (nan),nan (nan),nan (nan),nan (nan)
Midwest_survey,12.15 (1.466),nan (nan),nan (nan),nan (nan),nan (nan)
hpc-job-scheduling,116.06 (40.381),nan (nan),nan (nan),nan (nan),nan (nan)
video-game-sales,35.91 (8.086),nan (nan),nan (nan),nan (nan),nan (nan)
okcupid-stem,69.75 (22.213),nan (nan),nan (nan),nan (nan),nan (nan)
Diabetes130US,50.53 (4.358),nan (nan),nan (nan),nan (nan),nan (nan)


In [10]:
res_df["GMENN"].apply(lambda x: float(x.split(" ")[0])).values

array([ 43.27,  12.15, 116.06,  35.91,  69.75,  50.53])