In [1]:
import sys
# Append root path 
sys.path.append("../")
sys.path.append("../lmmnn")

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
os.environ["CUDA_VISIBLE_DEVICES"]="2"

if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

from model.mixed_effects import *
from utils.fe_models import get_model
from utils.evaluation import *
from utils.utils import *
from data.preprocessing import dataset_preprocessing
from utils.training_functions import *

# from vis.utils.utils import apply_modifications
# # helper function
def update_layer_activation(model, activation, index=-1):
    model.layers[index].activation = activation
    return model

from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Reshape, Embedding, Concatenate
from tensorflow.keras.activations import sigmoid

from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import f1_score as f1
from sklearn.model_selection import train_test_split
from category_encoders import TargetEncoder
from tensorflow_addons.metrics import F1Score

from scipy import stats
import pickle
import yaml
import time
import gc

RS = 555

SUCCESS: Found GPU: /device:GPU:0


#### Download and save data from Pargent et al. by running "data/download_pargent2022_datasets.py before running this notebook

In [2]:
mode="cv"
hct=10
test_ratio=None
val_ratio=None
folds=5
results = {}
dataset_names = ["churn", "kdd_internet_usage", "Amazon_employee_access", "Click_prediction_small", "adult", "KDDCup09_upselling", "kick", "open_payments", "road-safety-drivers-sex", "porto-seguro"]


loss_use = lambda: tf.keras.losses.BinaryCrossentropy

target= "binary"
batch_size=512
epochs = 500
early_stopping = 20
model_name = "AutoGluon"
embed_dims_method = "AutoGluon"


results = {}

#######################################

for dataset_name in dataset_names:
    print(f"Start training procedure for {dataset_name}")
    data_path = f"{mode}_RS{RS}_hct{hct}"
    if mode == "cv":
        data_path += f"_{folds}folds"
    elif mode == "train_test":
        data_path += f"_split{1-test_ratio*100}-{test_ratio*100}"
    elif mode == "train_val_test":
        data_path += f"_split{round(100-(test_ratio+val_ratio)*100)}-{round(test_ratio*100)}-{round(val_ratio*100)}"

    # If no data_dict exists, run preprocessing, else load data_dict
    if not os.path.exists(f"../data/prepared/{dataset_name}/"+data_path+"/data_dict.pickle"):
        dataset_preprocessing.process_dataset(dataset_name, target, mode, RS, hct, test_ratio, val_ratio, folds)
    with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)

    z_cols = data_dict["z_cols"]
    results[dataset_name] = {}
    for fold_num in range(folds):
        results[dataset_name][fold_num] = {}

        print(f"Fold no. {fold_num}")
        results[dataset_name][fold_num]["histories"] = {}
        results[dataset_name][fold_num]["predictions"] = {}
        results[dataset_name][fold_num]["times"] = {}
        results[dataset_name][fold_num]["other_info"] = {}
        for lambda_ in [0.,1.,5.,10.]:
            if lambda_==1.:
                save_path = f"../results/{dataset_name}/{data_path}/fold_{fold_num}"
                if not os.path.exists(save_path):
                    os.makedirs(save_path)
            else:
                save_path = f"../results/{dataset_name}/{data_path}/fold_{fold_num}/lambda__"+str(int(lambda_))
                if not os.path.exists(save_path):
                    os.makedirs(save_path)

        
            z_ohe_encoded_train = data_dict[f"z_ohe_encoded_train_{fold_num}"] 
            z_ohe_encoded_val = data_dict[f"z_ohe_encoded_val_{fold_num}"] 
            z_ohe_encoded_test = data_dict[f"z_ohe_encoded_test_{fold_num}"] 

            z_target_encoded_train = data_dict[f"z_target_encoded_train_{fold_num}"] 
            z_target_encoded_val = data_dict[f"z_target_encoded_val_{fold_num}"] 
            z_target_encoded_test = data_dict[f"z_target_encoded_test_{fold_num}"] 

            target_encoding_time = data_dict[f"target_encoding_time_{fold_num}"]
            ohe_encoding_time = data_dict[f"ohe_encoding_time_{fold_num}"]

            x_cols = data_dict[f"X_train_{fold_num}"].columns
            X_train = data_dict[f"X_train_{fold_num}"]
            Z_train = data_dict[f"Z_train_{fold_num}"]
            y_train = data_dict[f"y_train_{fold_num}"]

            X_val = data_dict[f"X_val_{fold_num}"]
            Z_val = data_dict[f"Z_val_{fold_num}"]
            y_val = data_dict[f"y_val_{fold_num}"]

            X_test = data_dict[f"X_test_{fold_num}"]
            Z_test = data_dict[f"Z_test_{fold_num}"]
            y_test = data_dict[f"y_test_{fold_num}"]

            if not os.path.exists(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle"):

                tf.random.set_seed(RS+fold_num)
                np.random.seed(RS+fold_num)

                qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

                X_train = tf.convert_to_tensor(X_train)
                Z_train = tf.convert_to_tensor(Z_train,dtype=tf.int32)
                y_train = tf.convert_to_tensor(y_train)

                X_val = tf.convert_to_tensor(X_val)
                Z_val = tf.convert_to_tensor(Z_val,dtype=tf.int32)
                y_val = tf.convert_to_tensor(y_val)

                X_test = tf.convert_to_tensor(X_test)
                Z_test = tf.convert_to_tensor(Z_test,dtype=tf.int32)
                y_test = tf.convert_to_tensor(y_test)

                if target == "categorical":
                    n_classes = np.unique(y_train).shape[0]
                elif target=="binary":
                    n_classes = 1

                y_train = tf.one_hot(tf.cast(y_train,tf.int32),n_classes)
                y_val = tf.one_hot(tf.cast(y_val,tf.int32),n_classes)
                y_test = tf.one_hot(tf.cast(y_test,tf.int32),n_classes)

                ##### GMENN #####
                d = X_train.shape[1] # columns
                n = X_train.shape[0] # rows
                num_outputs = n_classes
                perc_numeric = d/(d+Z_train.shape[1])

    #             qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

                set_seed(RS)

                fe_model, optimizer = get_model(model_name=model_name, input_size=X_train.shape[1], 
                                                  output_size=num_outputs, 
                                                  target=target, 
                                                  perc_numeric=perc_numeric, RS=RS)

                initial_stds = np.ones([len(qs),num_outputs]).astype(float).tolist()

                me_model = MixedEffectsNetwork(X_train, Z_train, y_train, fe_model, 
                                               target=target, qs=qs,
                                               initial_stds=initial_stds,
                                              fe_loss_weight=lambda_,
                                               mode="intercepts",
                                               early_stopping_fe=early_stopping,
                                              )    

                me_model.compile(
                    loss_class_me = loss_use()(),
                    loss_class_fe = loss_use()(),
                #     metric_class_me = tf.keras.metrics.AUC(multi_label=True, name="auc_me"),
                #     metric_class_fe = tf.keras.metrics.AUC(multi_label=True, name="auc_fe"),
                    optimizer=optimizer
                )

                mcmc = MCMCSamplingCallback(num_mcmc_samples=1,
                                            perc_burnin=0.7,
                                            warm_restart=None,
                                            num_burnin_steps=1,
                                            step_size = 0.1#initial_step_size,
                                       )


                print_metric = PrintMetrics(X_train, Z_train, y_train, X_val, Z_val, y_val)

                start = time.time()
                history = me_model.fit([X_train,Z_train], y_train,
                             callbacks=[mcmc,
                                        print_metric,
                                        tf.keras.callbacks.EarlyStopping(monitor="me_auc_val", patience=early_stopping, mode="max")],
                             epochs=epochs,
                             validation_data=[[X_val,Z_val],y_val],
                            batch_size=batch_size)

                end = time.time()
                fit_time_gmenn = round(end-start,2)

                y_train_pred_gmenn, y_train_pred_gmenn_fe = me_model([X_train,Z_train])
                y_val_pred_gmenn, y_val_pred_gmenn_fe = me_model([X_val,Z_val])
                y_test_pred_gmenn, y_test_pred_gmenn_fe = me_model([X_test,Z_test])    


                ###### Prepare NN Training ######



                ##### Document Results #####

                results[dataset_name][fold_num]["histories"]["GMENN"+str(int(lambda_))] = history.history

                results[dataset_name][fold_num]["predictions"]["GMENN"+str(int(lambda_))] = [y_train_pred_gmenn, y_val_pred_gmenn, y_test_pred_gmenn]
                results[dataset_name][fold_num]["predictions"]["GMENN_FE"+str(int(lambda_))] = [y_train_pred_gmenn_fe, y_val_pred_gmenn_fe, y_test_pred_gmenn_fe]
                
                results[dataset_name][fold_num]["times"]["GMENN"+str(int(lambda_))] = fit_time_gmenn

                results[dataset_name][fold_num]["other_info"]["GMENN"+str(int(lambda_))] = {
                        "_stddev_z": np.array([i.numpy() for i in me_model.data_model._stddev_z]),
                        "acceptance_rates": np.array(me_model.acceptance_rates),
                        "random_effects": me_model.mean_samples,
                        "all_samples": me_model.all_samples,
                        "stds": me_model.stds
                    }

                with open(f"{save_path}//results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'wb') as handle:
                    pickle.dump(results[dataset_name][fold_num], handle, protocol=pickle.HIGHEST_PROTOCOL)


                del X_train, X_val, X_test, y_train, y_val, y_test

                gc.collect()
            else:
                if lambda_==1.:
                    print(f"Load results for dataset {dataset_name}, iteration={fold_num}")
                    with open(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'rb') as handle:
                        res = pickle.load(handle)
                    results[dataset_name][fold_num]["histories"]["GMENN"+str(int(lambda_))] = res["histories"]["GMENN"]
                    results[dataset_name][fold_num]["predictions"]["GMENN"+str(int(lambda_))] = res["predictions"]["GMENN"]
                    results[dataset_name][fold_num]["predictions"]["GMENN_FE"+str(int(lambda_))] = res["predictions"]["GMENN (FE)"]
                    results[dataset_name][fold_num]["times"]["GMENN"+str(int(lambda_))] = res["times"]["GMENN"]
                    results[dataset_name][fold_num]["other_info"]["GMENN"+str(int(lambda_))] = res["other_info"]["GMENN"]
                    
                else:
                    print(f"Load results for dataset {dataset_name}, iteration={fold_num}")
                    with open(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'rb') as handle:
                        res = pickle.load(handle)
                    results[dataset_name][fold_num]["histories"]["GMENN"+str(int(lambda_))] = res["histories"]["GMENN"+str(int(lambda_))]
                    results[dataset_name][fold_num]["predictions"]["GMENN"+str(int(lambda_))] = res["predictions"]["GMENN"+str(int(lambda_))]
                    results[dataset_name][fold_num]["predictions"]["GMENN_FE"+str(int(lambda_))] = res["predictions"]["GMENN_FE"+str(int(lambda_))]
                    results[dataset_name][fold_num]["times"]["GMENN"+str(int(lambda_))] = res["times"]["GMENN"+str(int(lambda_))]
                    results[dataset_name][fold_num]["other_info"]["GMENN"+str(int(lambda_))] = res["other_info"]["GMENN"+str(int(lambda_))]


Start training procedure for churn
Fold no. 0
Load results for dataset churn, iteration=0
Load results for dataset churn, iteration=0
Load results for dataset churn, iteration=0
Load results for dataset churn, iteration=0
Fold no. 1
Load results for dataset churn, iteration=1
Load results for dataset churn, iteration=1
Load results for dataset churn, iteration=1
Load results for dataset churn, iteration=1
Fold no. 2
Load results for dataset churn, iteration=2
Load results for dataset churn, iteration=2
Load results for dataset churn, iteration=2
Load results for dataset churn, iteration=2
Fold no. 3
Load results for dataset churn, iteration=3
Load results for dataset churn, iteration=3
Load results for dataset churn, iteration=3
Load results for dataset churn, iteration=3
Fold no. 4
Load results for dataset churn, iteration=4
Load results for dataset churn, iteration=4
Load results for dataset churn, iteration=4
Load results for dataset churn, iteration=4
Start training procedure for k

Load results for dataset open_payments, iteration=0
Fold no. 1
Load results for dataset open_payments, iteration=1
Load results for dataset open_payments, iteration=1
Load results for dataset open_payments, iteration=1
Load results for dataset open_payments, iteration=1
Fold no. 2
Load results for dataset open_payments, iteration=2
Load results for dataset open_payments, iteration=2
Load results for dataset open_payments, iteration=2
Load results for dataset open_payments, iteration=2
Fold no. 3
Load results for dataset open_payments, iteration=3
Load results for dataset open_payments, iteration=3
Load results for dataset open_payments, iteration=3
Load results for dataset open_payments, iteration=3
Fold no. 4
Load results for dataset open_payments, iteration=4
Load results for dataset open_payments, iteration=4
Load results for dataset open_payments, iteration=4
Load results for dataset open_payments, iteration=4
Start training procedure for road-safety-drivers-sex
Fold no. 0
Load res

In [3]:
res

{'histories': {'GMENN0': {'me_loss': [<tf.Tensor: shape=(), dtype=float32, numpy=0.184518>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.1826381>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.18116763>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.17193593>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.16121641>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.15565391>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.15223552>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.14793493>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.14620835>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.1428905>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.13978659>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.13763738>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.13606915>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.133249>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.12989774>,
    <tf.Tensor: shape=(), dtype=float32, numpy=0.12825969>,
    <tf.Tens

In [4]:
results.keys()

dict_keys(['churn', 'kdd_internet_usage', 'Amazon_employee_access', 'Click_prediction_small', 'adult', 'KDDCup09_upselling', 'kick', 'open_payments', 'road-safety-drivers-sex', 'porto-seguro'])

## Evaluation

### Performance

In [5]:
models = ["GMENN"+str(int(lambda_)) for lambda_ in [0.,1.,5.,10.]]
results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
#         print(num)
        n_classes=1
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = np.array(results[dataset_name][num]["predictions"][model][2]).ravel()

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]
            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}


In [6]:
metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN0,GMENN1,GMENN5,GMENN10
churn,0.89 (0.015),0.88 (0.02),0.87 (0.018),0.87 (0.02)
kdd_internet_usage,0.94 (0.004),0.94 (0.003),0.94 (0.002),0.94 (0.002)
Amazon_employee_access,0.84 (0.008),0.84 (0.009),0.84 (0.01),0.84 (0.01)
Click_prediction_small,0.66 (0.016),0.66 (0.009),0.65 (0.012),0.65 (0.012)
adult,0.91 (0.003),0.91 (0.003),0.91 (0.002),0.91 (0.002)
KDDCup09_upselling,0.8 (0.007),0.8 (0.013),0.79 (0.021),0.8 (0.015)
kick,0.73 (0.005),0.74 (0.011),0.74 (0.008),0.75 (0.011)
open_payments,0.93 (0.002),0.93 (0.009),0.93 (0.008),0.92 (0.007)
road-safety-drivers-sex,0.73 (0.006),0.73 (0.004),0.73 (0.003),0.73 (0.003)
porto-seguro,0.56 (0.007),0.56 (0.004),0.56 (0.003),0.56 (0.004)


In [7]:
res_df["GMENN0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN10"].apply(lambda x: float(x.split(" ")[0])).values

(array([0.89, 0.94, 0.84, 0.66, 0.91, 0.8 , 0.73, 0.93, 0.73, 0.56]),
 array([0.88, 0.94, 0.84, 0.66, 0.91, 0.8 , 0.74, 0.93, 0.73, 0.56]),
 array([0.87, 0.94, 0.84, 0.65, 0.91, 0.79, 0.74, 0.93, 0.73, 0.56]),
 array([0.87, 0.94, 0.84, 0.65, 0.91, 0.8 , 0.75, 0.92, 0.73, 0.56]))

### Fixed Effects Performance

In [8]:
models = ["GMENN_FE"+str(int(lambda_)) for lambda_ in [0.,1.,5.,10.]]
results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
#         print(num)
        n_classes=1
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = np.array(results[dataset_name][num]["predictions"][model][2]).ravel()

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
#                 results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]
            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}


In [9]:
models = ["GMENN_FE"+str(int(lambda_)) for lambda_ in [0.,1.,5.,10.]]
metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN_FE0,GMENN_FE1,GMENN_FE5,GMENN_FE10
churn,0.78 (0.033),0.78 (0.035),0.78 (0.028),0.78 (0.033)
kdd_internet_usage,0.94 (0.004),0.94 (0.003),0.94 (0.002),0.94 (0.003)
Amazon_employee_access,0.5 (0.0),0.5 (0.0),0.5 (0.0),0.5 (0.0)
Click_prediction_small,0.62 (0.012),0.62 (0.005),0.62 (0.005),0.62 (0.005)
adult,0.89 (0.002),0.9 (0.001),0.9 (0.002),0.9 (0.002)
KDDCup09_upselling,0.77 (0.033),0.79 (0.029),0.79 (0.026),0.79 (0.017)
kick,0.68 (0.007),0.72 (0.015),0.74 (0.008),0.74 (0.009)
open_payments,0.5 (0.005),0.5 (0.005),0.5 (0.001),0.5 (0.001)
road-safety-drivers-sex,0.69 (0.008),0.7 (0.007),0.7 (0.004),0.7 (0.003)
porto-seguro,0.57 (0.006),0.57 (0.004),0.57 (0.004),0.56 (0.005)


In [10]:
res_df["GMENN_FE0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE10"].apply(lambda x: float(x.split(" ")[0])).values

(array([0.78, 0.94, 0.5 , 0.62, 0.89, 0.77, 0.68, 0.5 , 0.69, 0.57]),
 array([0.78, 0.94, 0.5 , 0.62, 0.9 , 0.79, 0.72, 0.5 , 0.7 , 0.57]),
 array([0.78, 0.94, 0.5 , 0.62, 0.9 , 0.79, 0.74, 0.5 , 0.7 , 0.57]),
 array([0.78, 0.94, 0.5 , 0.62, 0.9 , 0.79, 0.74, 0.5 , 0.7 , 0.56]))

### Time

In [11]:
models = ["GMENN"+str(int(lambda_)) for lambda_ in [0.,1.,5.,10.]]
results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
#         print(num)
        n_classes=1
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = np.array(results[dataset_name][num]["predictions"][model][2]).ravel()

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]
            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}


In [12]:
metric = "Time"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())/60
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmin()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN0,GMENN1,GMENN5,GMENN10
churn,0.67 (0.067),1.2 (0.202),0.63 (0.085),0.66 (0.099)
kdd_internet_usage,1.0 (0.406),1.99 (1.178),1.61 (0.604),1.23 (0.422)
Amazon_employee_access,1.98 (0.563),2.36 (0.409),2.5 (0.8),2.43 (0.819)
Click_prediction_small,2.48 (0.564),1.86 (0.301),2.56 (0.318),2.51 (0.294)
adult,0.65 (0.057),0.62 (0.058),0.56 (0.07),0.54 (0.047)
KDDCup09_upselling,13.39 (22.378),3.56 (1.389),5.25 (3.481),11.16 (12.069)
kick,1.09 (0.095),0.88 (0.062),1.01 (0.105),1.04 (0.17)
open_payments,2.73 (0.86),2.12 (0.687),1.69 (0.223),1.69 (0.403)
road-safety-drivers-sex,2.39 (0.505),3.43 (0.842),3.24 (0.799),3.76 (1.196)
porto-seguro,2.16 (0.166),2.13 (0.193),2.11 (0.068),2.15 (0.237)


In [13]:
res_df["GMENN0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN10"].apply(lambda x: float(x.split(" ")[0])).values

(array([ 0.67,  1.  ,  1.98,  2.48,  0.65, 13.39,  1.09,  2.73,  2.39,
         2.16]),
 array([1.2 , 1.99, 2.36, 1.86, 0.62, 3.56, 0.88, 2.12, 3.43, 2.13]),
 array([0.63, 1.61, 2.5 , 2.56, 0.56, 5.25, 1.01, 1.69, 3.24, 2.11]),
 array([ 0.66,  1.23,  2.43,  2.51,  0.54, 11.16,  1.04,  1.69,  3.76,
         2.15]))