In [1]:
import sys
# Append root path 
sys.path.append("../")
sys.path.append("../lmmnn")

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
os.environ["CUDA_VISIBLE_DEVICES"]="2"

if tf.test.gpu_device_name() != '/device:GPU:0':
    print('WARNING: GPU device not found.')
else:
    print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

from model.mixed_effects import *
from utils.fe_models import get_model
from utils.evaluation import *
from utils.utils import *
from data.preprocessing import dataset_preprocessing

# from vis.utils.utils import apply_modifications
# helper function
def update_layer_activation(model, activation, index=-1):
    model.layers[index].activation = activation
    return model

from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Reshape, Embedding, Concatenate
from tensorflow.keras.activations import sigmoid

from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import f1_score as f1
from sklearn.model_selection import train_test_split
from category_encoders import TargetEncoder
from tensorflow_addons.metrics import F1Score

from scipy import stats
import pickle
import yaml
import time
import gc

RS = 555

SUCCESS: Found GPU: /device:GPU:0


#### Download and save data from Pargent et al. by running "data/download_pargent2022_datasets.py before running this notebook

In [2]:
mode="cv"
hct=10
test_ratio=None
val_ratio=None
folds=5
results = {}
dataset_names = ["eucalyptus", "Midwest_survey", "hpc-job-scheduling", "video-game-sales", "okcupid-stem", "Diabetes130US"]


loss_use = lambda: tf.keras.losses.CategoricalCrossentropy
target= "categorical"
batch_size=512
epochs = 500
early_stopping = 20
model_name = "AutoGluon"
embed_dims_method = "AutoGluon"

results = {}

#######################################

for dataset_name in dataset_names:
    print(f"Start training procedure for {dataset_name}")
    data_path = f"{mode}_RS{RS}_hct{hct}"
    if mode == "cv":
        data_path += f"_{folds}folds"
    elif mode == "train_test":
        data_path += f"_split{1-test_ratio*100}-{test_ratio*100}"
    elif mode == "train_val_test":
        data_path += f"_split{round(100-(test_ratio+val_ratio)*100)}-{round(test_ratio*100)}-{round(val_ratio*100)}"

    # If no data_dict exists, run preprocessing, else load data_dict
    if not os.path.exists(f"../data/prepared/{dataset_name}/"+data_path+"/data_dict.pickle"):
        dataset_preprocessing.process_dataset(dataset_name, target, mode, RS, hct, test_ratio, val_ratio, folds)
    with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)

    z_cols = data_dict["z_cols"]
    
    results[dataset_name] = {}
    for fold_num in range(folds):
        results[dataset_name][fold_num] = {}

        print(f"Fold no. {fold_num}")
        results[dataset_name][fold_num]["histories"] = {}
        results[dataset_name][fold_num]["predictions"] = {}
        results[dataset_name][fold_num]["times"] = {}
        results[dataset_name][fold_num]["other_info"] = {}
        for lambda_ in [0.,1.,5.,10.]:
            save_path = f"../results/{dataset_name}/{data_path}/fold_{fold_num}/lambda__"+str(int(lambda_))
            if not os.path.exists(save_path):
                os.makedirs(save_path)

        
            z_ohe_encoded_train = data_dict[f"z_ohe_encoded_train_{fold_num}"] 
            z_ohe_encoded_val = data_dict[f"z_ohe_encoded_val_{fold_num}"] 
            z_ohe_encoded_test = data_dict[f"z_ohe_encoded_test_{fold_num}"] 

            z_target_encoded_train = data_dict[f"z_target_encoded_train_{fold_num}"] 
            z_target_encoded_val = data_dict[f"z_target_encoded_val_{fold_num}"] 
            z_target_encoded_test = data_dict[f"z_target_encoded_test_{fold_num}"] 

            target_encoding_time = data_dict[f"target_encoding_time_{fold_num}"]
            ohe_encoding_time = data_dict[f"ohe_encoding_time_{fold_num}"]

            x_cols = data_dict[f"X_train_{fold_num}"].columns
            X_train = data_dict[f"X_train_{fold_num}"]
            Z_train = data_dict[f"Z_train_{fold_num}"]
            y_train = data_dict[f"y_train_{fold_num}"]

            X_val = data_dict[f"X_val_{fold_num}"]
            Z_val = data_dict[f"Z_val_{fold_num}"]
            y_val = data_dict[f"y_val_{fold_num}"]

            X_test = data_dict[f"X_test_{fold_num}"]
            Z_test = data_dict[f"Z_test_{fold_num}"]
            y_test = data_dict[f"y_test_{fold_num}"]

            if not os.path.exists(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle"):

                tf.random.set_seed(RS+fold_num)
                np.random.seed(RS+fold_num)

                qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

                X_train = tf.convert_to_tensor(X_train)
                Z_train = tf.convert_to_tensor(Z_train,dtype=tf.int32)
                y_train = tf.convert_to_tensor(y_train)

                X_val = tf.convert_to_tensor(X_val)
                Z_val = tf.convert_to_tensor(Z_val,dtype=tf.int32)
                y_val = tf.convert_to_tensor(y_val)

                X_test = tf.convert_to_tensor(X_test)
                Z_test = tf.convert_to_tensor(Z_test,dtype=tf.int32)
                y_test = tf.convert_to_tensor(y_test)

                if target == "categorical":
                    n_classes = np.unique(y_train).shape[0]
                elif target=="binary":
                    n_classes = 1

                y_train = tf.one_hot(tf.cast(y_train,tf.int32),n_classes)
                y_val = tf.one_hot(tf.cast(y_val,tf.int32),n_classes)
                y_test = tf.one_hot(tf.cast(y_test,tf.int32),n_classes)

                ##### GMENN #####
                d = X_train.shape[1] # columns
                n = X_train.shape[0] # rows
                num_outputs = n_classes
                perc_numeric = d/(d+Z_train.shape[1])

    #             qs = np.max([tf.reduce_max(Z_train, axis=0),tf.reduce_max(Z_val, axis=0),tf.reduce_max(Z_test, axis=0)],axis=0)+1

                set_seed(RS)

                fe_model, optimizer = get_model(model_name=model_name, input_size=X_train.shape[1], 
                                                  output_size=num_outputs, 
                                                  target=target, 
                                                  perc_numeric=perc_numeric, RS=RS)
                
                if dataset_name=="eucalyptus":
                    optimizer.learning_rate.assign(optimizer.learning_rate*10)

                initial_stds = np.ones([len(qs),num_outputs]).astype(float).tolist()

                me_model = MixedEffectsNetwork(X_train, Z_train, y_train, fe_model, 
                                               target=target, qs=qs,
                                               initial_stds=initial_stds,
                                              fe_loss_weight=lambda_,
                                               mode="intercepts",
                                               early_stopping_fe=early_stopping,
                                              )    

                me_model.compile(
                    loss_class_me = loss_use()(),
                    loss_class_fe = loss_use()(),
                #     metric_class_me = tf.keras.metrics.AUC(multi_label=True, name="auc_me"),
                #     metric_class_fe = tf.keras.metrics.AUC(multi_label=True, name="auc_fe"),
                    optimizer=optimizer
                )

                mcmc = MCMCSamplingCallback(num_mcmc_samples=1,
                                            perc_burnin=0.7,
                                            warm_restart=None,
                                            num_burnin_steps=1,
                                            step_size = 0.1#initial_step_size,
                                       )


                print_metric = PrintMetrics(X_train, Z_train, y_train, X_val, Z_val, y_val)

                start = time.time()
                history = me_model.fit([X_train,Z_train], y_train,
                             callbacks=[mcmc,
                                        print_metric,
                                        tf.keras.callbacks.EarlyStopping(monitor="me_auc_val", patience=early_stopping, mode="max")],
                             epochs=epochs,
                             validation_data=[[X_val,Z_val],y_val],
                            batch_size=batch_size)

                end = time.time()
                fit_time_gmenn = round(end-start,2)

                y_train_pred_gmenn, y_train_pred_gmenn_fe = me_model([X_train,Z_train])
                y_val_pred_gmenn, y_val_pred_gmenn_fe = me_model([X_val,Z_val])
                y_test_pred_gmenn, y_test_pred_gmenn_fe = me_model([X_test,Z_test])    


                ###### Prepare NN Training ######



                ##### Document Results #####

                results[dataset_name][fold_num]["histories"]["GMENN"+str(int(lambda_))] = history.history

                results[dataset_name][fold_num]["predictions"]["GMENN"+str(int(lambda_))] = [y_train_pred_gmenn, y_val_pred_gmenn, y_test_pred_gmenn]
                results[dataset_name][fold_num]["predictions"]["GMENN_FE"+str(int(lambda_))] = [y_train_pred_gmenn_fe, y_val_pred_gmenn_fe, y_test_pred_gmenn_fe]
                
                results[dataset_name][fold_num]["times"]["GMENN"+str(int(lambda_))] = fit_time_gmenn

                results[dataset_name][fold_num]["other_info"]["GMENN"+str(int(lambda_))] = {
                        "_stddev_z": np.array([i.numpy() for i in me_model.data_model._stddev_z]),
                        "acceptance_rates": np.array(me_model.acceptance_rates),
                        "random_effects": me_model.mean_samples,
                        "all_samples": me_model.all_samples,
                        "stds": me_model.stds
                    }

                with open(f"{save_path}//results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'wb') as handle:
                    pickle.dump(results[dataset_name][fold_num], handle, protocol=pickle.HIGHEST_PROTOCOL)


                del X_train, X_val, X_test, y_train, y_val, y_test

                gc.collect()
            else:
                print(f"Load results for dataset {dataset_name}, iteration={fold_num}")
                with open(f"{save_path}/results_RS{RS}_{dataset_name}_iter{fold_num}.pickle", 'rb') as handle:
                    res = pickle.load(handle)
                results[dataset_name][fold_num]["histories"]["GMENN"+str(int(lambda_))] = res["histories"]["GMENN"+str(int(lambda_))]
                results[dataset_name][fold_num]["predictions"]["GMENN"+str(int(lambda_))] = res["predictions"]["GMENN"+str(int(lambda_))]
                results[dataset_name][fold_num]["predictions"]["GMENN_FE"+str(int(lambda_))] = res["predictions"]["GMENN_FE"+str(int(lambda_))]
                results[dataset_name][fold_num]["times"]["GMENN"+str(int(lambda_))] = res["times"]["GMENN"+str(int(lambda_))]
                results[dataset_name][fold_num]["other_info"]["GMENN"+str(int(lambda_))] = res["other_info"]["GMENN"+str(int(lambda_))]


Start training procedure for eucalyptus
Fold no. 0
Load results for dataset eucalyptus, iteration=0
Load results for dataset eucalyptus, iteration=0
Load results for dataset eucalyptus, iteration=0
Load results for dataset eucalyptus, iteration=0
Fold no. 1
Load results for dataset eucalyptus, iteration=1
Load results for dataset eucalyptus, iteration=1
Load results for dataset eucalyptus, iteration=1
Load results for dataset eucalyptus, iteration=1
Fold no. 2
Load results for dataset eucalyptus, iteration=2
Load results for dataset eucalyptus, iteration=2
Load results for dataset eucalyptus, iteration=2
Load results for dataset eucalyptus, iteration=2
Fold no. 3
Load results for dataset eucalyptus, iteration=3
Load results for dataset eucalyptus, iteration=3
Load results for dataset eucalyptus, iteration=3
Load results for dataset eucalyptus, iteration=3
Fold no. 4
Load results for dataset eucalyptus, iteration=4
Load results for dataset eucalyptus, iteration=4
Load results for datase

## Evaluation

### Performance

In [3]:
models = ["GMENN"+str(int(lambda_)) for lambda_ in [0,1,5,10]]

results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
        y_test = data_dict[f"y_test_{num}"]
        n_classes = np.unique(y_test).shape[0]
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = results[dataset_name][num]["predictions"][model][2]

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]

            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}
#                 print(f"Didnt work for {dataset_name}, {num}")


2024-01-22 12:56:18.927017: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


In [4]:
models = ["GMENN"+str(int(lambda_)) for lambda_ in [0,1,5,10]]

metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN0,GMENN1,GMENN5,GMENN10
eucalyptus,0.89 (0.024),0.9 (0.021),0.9 (0.023),0.89 (0.025)
Midwest_survey,0.87 (0.021),0.88 (0.025),0.88 (0.026),0.88 (0.026)
hpc-job-scheduling,0.91 (0.008),0.91 (0.008),0.9 (0.006),0.88 (0.041)
video-game-sales,0.78 (0.01),0.79 (0.01),0.79 (0.009),0.79 (0.01)
okcupid-stem,0.79 (0.003),0.8 (0.004),0.81 (0.003),0.81 (0.003)
Diabetes130US,0.63 (0.002),0.64 (0.003),0.65 (0.005),0.65 (0.005)


In [5]:
res_df["GMENN0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN10"].apply(lambda x: float(x.split(" ")[0])).values

(array([0.89, 0.87, 0.91, 0.78, 0.79, 0.63]),
 array([0.9 , 0.88, 0.91, 0.79, 0.8 , 0.64]),
 array([0.9 , 0.88, 0.9 , 0.79, 0.81, 0.65]),
 array([0.89, 0.88, 0.88, 0.79, 0.81, 0.65]))

### Fixed Effects Performance

In [6]:
models = ["GMENN_FE"+str(int(lambda_)) for lambda_ in [0,1,5,10]]

results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
        y_test = data_dict[f"y_test_{num}"]
        n_classes = np.unique(y_test).shape[0]
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = results[dataset_name][num]["predictions"][model][2]

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
#                 results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]

            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}
#                 print(f"Didnt work for {dataset_name}, {num}")


In [7]:
models = ["GMENN_FE"+str(int(lambda_)) for lambda_ in [0.,1.,5.,10.]]
metric = "AUROC"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmax()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN_FE0,GMENN_FE1,GMENN_FE5,GMENN_FE10
eucalyptus,0.86 (0.045),0.9 (0.022),0.89 (0.023),0.89 (0.024)
Midwest_survey,0.73 (0.021),0.75 (0.021),0.75 (0.021),0.75 (0.021)
hpc-job-scheduling,0.83 (0.009),0.85 (0.005),0.85 (0.004),0.83 (0.05)
video-game-sales,0.63 (0.025),0.69 (0.008),0.7 (0.009),0.7 (0.01)
okcupid-stem,0.71 (0.004),0.72 (0.004),0.73 (0.006),0.73 (0.006)
Diabetes130US,0.6 (0.005),0.61 (0.003),0.62 (0.009),0.62 (0.007)


In [8]:
res_df["GMENN_FE0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN_FE10"].apply(lambda x: float(x.split(" ")[0])).values

(array([0.86, 0.73, 0.83, 0.63, 0.71, 0.6 ]),
 array([0.9 , 0.75, 0.85, 0.69, 0.72, 0.61]),
 array([0.89, 0.75, 0.85, 0.7 , 0.73, 0.62]),
 array([0.89, 0.75, 0.83, 0.7 , 0.73, 0.62]))

### Time

In [9]:
models = ["GMENN"+str(int(lambda_)) for lambda_ in [0,1,5,10]]

results_perf = {dataset_name: {num: {model: {}  for model in models} for num in range(folds)} for dataset_name in dataset_names}
for dataset_name in dataset_names:
    try:
        with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
            data_dict = pickle.load(handle)        
    except:
        print(f"dataset {dataset_name} not found") 
    for num in range(folds):
        y_test = data_dict[f"y_test_{num}"]
        n_classes = np.unique(y_test).shape[0]
        y_test = tf.one_hot(data_dict[f"y_test_{num}"],n_classes)
        for model in models:
            try:
                y_pred = results[dataset_name][num]["predictions"][model][2]

                results_perf[dataset_name][num][model] = get_metrics(y_test,y_pred,target)
                results_perf[dataset_name][num][model]["Time"] = results[dataset_name][num]["times"][model]
#                 results_perf[dataset_name][num][model]["FE_AUC"] = results[dataset_name][num]["histories"][model]["fe_auc_val"][-1]

            except:
                print(f"Set nan for {dataset_name}, {num}")
                results_perf[dataset_name][num][model] = {"Accuracy": np.nan,
                                                          "AUROC": np.nan,
                                                          "F1": np.nan,
                                                          "Time": np.nan,
                                                          "FE_AUC": np.nan}
#                 print(f"Didnt work for {dataset_name}, {num}")


In [10]:
metric = "Time"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

round_mean_at = 2
round_std_at = 3

for dataset_name in dataset_names:
    dataset_models = list(results_perf[dataset_name][0].keys())
    use_df = pd.DataFrame([pd.DataFrame(results_perf[dataset_name][fold_num]).loc[metric,models] for fold_num in results_perf[dataset_name].keys()],index=results_perf[dataset_name].keys())/60
    
    df_mean = pd.DataFrame(use_df.mean(axis=0).round(round_mean_at).astype(str) + " (" + use_df.std(axis=0).round(round_std_at).astype(str) + ")").transpose()
    model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}
    dataset_res_dict[dataset_name] = model_dict
    
    best_models[dataset_name] = use_df.columns[use_df.mean(axis=0).argmin()]

    t_test_res = np.array([stats.ttest_rel(use_df[best_models[dataset_name]].values, use_df[model].values)[1] if model in dataset_models else 0 for model in models]).round(3)
    t_test_res[np.isnan(t_test_res)] = 1.
    t_test_results[dataset_name] = t_test_res
    
res_df = pd.DataFrame(dataset_res_dict).transpose()
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_results[dataset_name][i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

# res_df.style.apply(negative_bold)
res_df

Unnamed: 0,GMENN0,GMENN1,GMENN5,GMENN10
eucalyptus,5.95 (4.682),3.58 (3.31),2.35 (1.637),2.41 (1.512)
Midwest_survey,1.54 (0.559),1.26 (0.494),1.41 (0.491),1.41 (0.491)
hpc-job-scheduling,4.5 (1.473),3.79 (1.322),6.69 (4.127),2.76 (1.76)
video-game-sales,1.36 (0.211),1.4 (0.341),1.47 (0.445),1.55 (0.553)
okcupid-stem,2.42 (0.303),2.33 (0.268),2.95 (0.528),2.97 (0.811)
Diabetes130US,2.54 (0.268),2.66 (0.277),2.5 (0.159),2.39 (0.161)


In [11]:
res_df["GMENN0"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN1"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN5"].apply(lambda x: float(x.split(" ")[0])).values,res_df["GMENN10"].apply(lambda x: float(x.split(" ")[0])).values

(array([5.95, 1.54, 4.5 , 1.36, 2.42, 2.54]),
 array([3.58, 1.26, 3.79, 1.4 , 2.33, 2.66]),
 array([2.35, 1.41, 6.69, 1.47, 2.95, 2.5 ]),
 array([2.41, 1.41, 2.76, 1.55, 2.97, 2.39]))

In [12]:
print("")


