In [96]:
import pandas as pd
import os

# easySHARE

## Benchmarks

In [97]:
path_to_results = os.getcwd() + "/results/easySHARE/"


index_pd = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        ["Functional intercept and slopes", "Functional slopes only", "Functional intercept only","No functional effects"],
    ],
    names=["Model", "Functional effects"],
)

benchmark_table = pd.DataFrame({},
    columns=[
        "MAE",
        "EMAE",
        "MCEL",
        "Comput. time [s]",
    ],
    index=index_pd,
    
)



for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            results = pd.read_csv(
                f"{path_to_results}{model}/results_dict_fi{func_int}_fp{func_params}.csv"
            )

            benchmark_table.loc[
                (model, f"Functional intercept and slopes" if func_int == "True" and func_params == "True" else
                 f"Functional intercept only" if func_int == "True" else
                 f"Functional slopes only" if func_params == "True" else
                 "No functional effects"),
                ["MAE", "EMAE", "MCEL", "Comput. time [s]"],
            ] = results[["mae_test", "emae_test", "loss_test", "train_time"]].values.round(3)
            benchmark_table["Comput. time [s]"] = benchmark_table["Comput. time [s]"].astype(float).round(2)

benchmark_table


Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,EMAE,MCEL,Comput. time [s]
Model,Functional effects,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RUMBoost,Functional intercept and slopes,1.369,0.146,0.251,169.3
RUMBoost,Functional slopes only,1.37,0.146,0.251,178.04
RUMBoost,Functional intercept only,1.368,0.146,0.251,241.08
RUMBoost,No functional effects,1.413,0.152,0.26,292.95
TasteNet,Functional intercept and slopes,1.583,0.138,0.381,37.99
TasteNet,Functional slopes only,1.375,0.147,0.252,85.42
TasteNet,Functional intercept only,1.399,0.148,0.258,44.73
TasteNet,No functional effects,1.425,0.151,0.262,307.65


In [98]:
benchmark_table.to_latex("results/easySHARE/benchmark_table.tex", index=True, float_format="%.3f", escape=False,
    column_format="lrrrr",)

## Variables in the model

In [99]:
from constants import alt_spec_features, PATH_TO_DATA

all_alt_spec_features = []
for k, v in alt_spec_features["easySHARE"].items():
    all_alt_spec_features.extend(v)

df = pd.read_csv(PATH_TO_DATA["easySHARE"])
socio_demo_chars = [
    col
    for col in df.columns
    if col not in all_alt_spec_features
    and col not in ["mergeid", "hhid", "coupleid", "depression_scale"]
]

In [100]:
socio_demo_chars

['female',
 'age',
 'hhsize',
 'nb_children',
 'hh_net_income',
 'born_in_itw_country_yes',
 'education_level_1',
 'education_level_2',
 'education_level_3',
 'education_level_4',
 'education_level_5',
 'education_level_6',
 'education_none',
 'education_studying',
 'marital_status_married',
 'marital_status_reg_partnership',
 'marital_status_separated',
 'marital_status_single',
 'marital_status_widowed',
 'partnerinhh_yes',
 'mother_alive_yes',
 'father_alive_yes',
 'help_outside_hh_yes',
 'smoking_yes',
 'ever_smoked_yes',
 'vigorous_activity_more_than_once_a_week',
 'vigorous_activity_never',
 'vigorous_activity_once_three_times_a_month',
 'job_situation_homemaker',
 'job_situation_other',
 'job_situation_retired',
 'job_situation_unable_to_work',
 'job_situation_unemployed',
 'household_ends_meet_fairly_easily',
 'household_ends_meet_with_great_difficulty',
 'household_ends_meet_with_some_difficulty',
 'has_citizenship_yes',
 'country_BE',
 'country_BG',
 'country_CH',
 'country_C

In [101]:
all_alt_spec_features

['chronic_mod',
 'nb_doctor_visits',
 'maxgrip',
 'daily_activities_index',
 'instrumental_activities_index',
 'mobilityind',
 'lgmuscle',
 'grossmotor',
 'finemotor',
 'recall_1',
 'recall_2',
 'bmi',
 'sphus_fair',
 'sphus_good',
 'sphus_poor',
 'sphus_very_good',
 'hospitalised_last_year_yes',
 'nursing_home_last_year_yes_permanently',
 'nursing_home_last_year_yes_temporarily']

## Alt-spec feature names

In [102]:
feature_names = {
    "bmi": "BMI",
    "chronic_mod": "Number of chronic conditions",
    "daily_activities_index": "Daily activities index",
    "finemotor": "Fine motor skills",
    "grossmotor": "Gross motor skills",
    "hospitalised_last_year_yes": "Hospitalised last year",
    "lgmuscle": "Large muscle skills",
    "maxgrip": "Max grip strength",
    "mobilityind": "Mobility index",
    "nursing_home_last_year_yes_permanently": "Nursing home last year (permanently)",
    "nursing_home_last_year_yes_temporarily": "Nursing home last year (temporarily)",
    "recall_1": "Recall 1",
    "recall_2": "Recall 2",
    "sphus_fair": "Self-perceived health - fair",
    "sphus_good": "Self-perceived health - good",
    "sphus_poor": "Self-perceived health - poor",
    "sphus_very_good": "Self-perceived health - very good",
    "sphus_very_poor": "Self-perceived health - very poor",
    "instrumental_activities_index": "Instrumental activities index",
    "nb_doctor_visits": "Number of doctor visits",
}

print([key for key in feature_names.values()])

['BMI', 'Number of chronic conditions', 'Daily activities index', 'Fine motor skills', 'Gross motor skills', 'Hospitalised last year', 'Large muscle skills', 'Max grip strength', 'Mobility index', 'Nursing home last year (permanently)', 'Nursing home last year (temporarily)', 'Recall 1', 'Recall 2', 'Self-perceived health - fair', 'Self-perceived health - good', 'Self-perceived health - poor', 'Self-perceived health - very good', 'Self-perceived health - very poor', 'Instrumental activities index', 'Number of doctor visits']


## hyperparameters

In [103]:
import os
import pandas as pd

path_to_results = os.getcwd() + "/results/easySHARE/"

multi_columns = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        [
            "Functional intercept and slopes",
            "Functional slopes only",
            "Functional intercept only",
            "No functional effects",
        ],
    ],
)

indices = [
    "Validation MCEL",
    "Time [s]",
    "best_iteration",
    "lambda_l1",
    "lambda_l2",
    "num_leaves",
    "feature_fraction",
    "bagging_fraction",
    "bagging_freq",
    "min_data_in_leaf",
    "max_bin",
    "min_sum_hessian_in_leaf",
    "min_gain_to_split",
    "batch_size",
    "learning_rate",
    "dropout",
    "act_func",
    "batch_norm",
    "layer_sizes",
]

hyp_results = pd.DataFrame({}, columns=multi_columns, index=indices)

search_space = {
    "Search space": [
        "-",
        "-",
        r"Max 3000 iterations \slash 200 epochs",
        r"$[10^{-8}, 1]$",
        r"$[10^{-8}, 1]$",
        r"$[2, 256]$",
        r"$[0.4, 1]$",
        r"$[0.4, 1]$",
        r"$[1, 7]$",
        r"$[1, 200]$",
        r"$[64, 511]$",
        r"$[10^{-8}, 10]$",
        r"$[10^{-8}, 10]$",
        r"\{256, 512\}",
        r"$[0.0001, 0.01]$",
        r"$[0.0, 0.9]$",
        r"\{ReLU, Sigmoid, Tanh\}",
        r"\{True, False\}",
        r"\{[32], [64], [128], [32, 32], [64, 64], [128, 128], [64, 128], [128, 64], [64, 128, 64]\}",
    ]
}

distribution = {
    "Distribution": [
        "-",
        "-",
        "-",
        "Log-uniform",
        "Log-uniform",
        "Discrete uniform",
        "Uniform",
        "Uniform",
        "Discrete uniform",
        "Discrete uniform",
        "Discrete uniform",
        "Log-uniform",
        "Log-uniform",
        "Categorical",
        "Log-uniform",
        "Uniform",
        "Categorical",
        "Categorical",
        "Categorical",
    ]
}

search_space_series = pd.Series(search_space["Search space"], index=hyp_results.index)
distribution_series = pd.Series(distribution["Distribution"], index=hyp_results.index)

hyp_results.insert(0, ("", "Search space"), search_space_series)
hyp_results.insert(1, ("", "Distribution"), distribution_series)


for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            if func_int == "False" and func_params == "False" and model == "TasteNet":
                continue
            results_dict = pd.read_pickle(
                f"{path_to_results}{model}/best_params_fi{func_int}_fp{func_params}.pkl"
            )
            results = pd.DataFrame(results_dict.values(), index=results_dict.keys())

            hyp_results.loc[
                results.index,
                (
                    model,
                    (
                        "Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            "Functional intercept only"
                            if func_int == "True"
                            else (
                                "Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = results.values.reshape(-1)

for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            if func_int == "False" and func_params == "False" and model == "TasteNet":
                continue
            opt_and_cel = pd.read_table(
                f"{path_to_results}{model}/hyper_search_info_fi{func_int}_fp{func_params}.txt",
                header=None,
                index_col=0,
            )
            opt_and_cel_dict = {}
            for element in opt_and_cel.index.to_list():
                ind_and_value = element.split(":")
                opt_and_cel_dict[ind_and_value[0]] = float(ind_and_value[1].strip())
            opt_and_cel = pd.DataFrame(
                opt_and_cel_dict.values(),
                index=opt_and_cel_dict.keys(),
                columns=["values"],
            )
            hyp_results.loc[
                "Time [s]",
                (
                    model,
                    (
                        "Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            "Functional intercept only"
                            if func_int == "True"
                            else (
                                "Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = opt_and_cel.loc["Optimisation time"].values.round(0)
            hyp_results.loc[
                "Validation MCEL",
                (
                    model,
                    (
                        f"Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            f"Functional intercept only"
                            if func_int == "True"
                            else (
                                f"Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = opt_and_cel.loc["Best value"].values.round(3)

hyp_results = hyp_results.rename(index={"best_iteration": "Best iteration/epoch"})
hyp_results = hyp_results.fillna("-")
hyp_results.loc["learning_rate", ("RUMBoost")] = 0.05

In [104]:
hyp_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RUMBoost,RUMBoost,RUMBoost,RUMBoost,TasteNet,TasteNet,TasteNet,TasteNet
Unnamed: 0_level_1,Search space,Distribution,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects
Validation MCEL,-,-,0.253,0.253,0.253,0.261,0.253,0.253,0.254,-
Time [s],-,-,4512.0,5683.0,7752.0,23230.0,9501.0,7977.0,7969.0,-
Best iteration/epoch,Max 3000 iterations \slash 200 epochs,-,385.0,517.0,867.0,1070.0,41.0,38.0,21.0,-
lambda_l1,"$[10^{-8}, 1]$",Log-uniform,0.520881,0.000309,0.0,0.0,0.000786,0.000678,0.0,-
lambda_l2,"$[10^{-8}, 1]$",Log-uniform,0.000019,0.0,0.000798,0.0,0.000112,0.000001,0.091343,-
num_leaves,"$[2, 256]$",Discrete uniform,3.0,3.0,11.0,5.0,-,-,-,-
feature_fraction,"$[0.4, 1]$",Uniform,0.787585,0.707734,0.492381,0.978812,-,-,-,-
bagging_fraction,"$[0.4, 1]$",Uniform,0.997844,0.998197,0.878719,0.921156,-,-,-,-
bagging_freq,"$[1, 7]$",Discrete uniform,2.0,1.0,1.0,4.0,-,-,-,-
min_data_in_leaf,"$[1, 200]$",Discrete uniform,159.0,173.0,194.0,155.0,-,-,-,-


In [134]:
hyp_results.to_latex(
    "results/easySHARE/hyperparameter_results.tex",
    index=True,
    float_format="%.3f",
    escape=False,
    column_format="l" + "r" * (len(hyp_results.columns) - 1),
)

## thresholds

In [132]:
from models_wrapper import TasteNet, RUMBoost
import numpy as np
import lightgbm as lgb
import torch

path_to_results = os.getcwd() + "/results/easySHARE/"

multi_columns = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        [
            "Functional intercept and slopes",
            "Functional slopes only",
            "Functional intercept only",
            "No functional effects",
        ],
    ],
)

indices = pd.Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], name="Thresholds")

thresholds_results = pd.DataFrame({}, columns=multi_columns, index=indices)

for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:

            if model == "RUMBoost":
                path = (
                    f"{path_to_results}{model}/model_fi{func_int}_fp{func_params}.json"
                )
                actual_model = RUMBoost()
                actual_model.load_model(path)
                df_zero = pd.DataFrame(
                    np.zeros(len(all_alt_spec_features + socio_demo_chars)).reshape(
                        1, -1
                    ),
                    columns=all_alt_spec_features + socio_demo_chars,
                )
                lgb_zero = lgb.Dataset(df_zero, label=np.zeros(1), free_raw_data=False)
                print(actual_model.model.asc)
                actual_model.model.asc = torch.tensor(
                    [0], device=actual_model.model.device
                )
                raw_preds_zero = actual_model.model.predict(lgb_zero, utilities=True).cpu().numpy()
                thresh = actual_model.model.thresholds - raw_preds_zero
            elif model == "TasteNet":
                path = (
                    f"{path_to_results}{model}/model_fi{func_int}_fp{func_params}.pth"
                )
                actual_model = TasteNet()
                actual_model.load_model(path)
                z = torch.zeros(
                    len(socio_demo_chars),
                    device=torch.device("cuda"),
                )
                if func_int == "True":
                    z = z.reshape(1, -1)
                    v_all = actual_model.model.params_module(z)
                    v = v_all[:, -1]
                else:
                    v = actual_model.model.util_module.intercept
                thresh = (
                    actual_model.model.ordinal_module.coral_bias.detach()
                    .cpu()
                    .numpy()[::-1]
                ) - v.detach().cpu().numpy()

            thresholds_results.loc[
                :,
                (
                    model,
                    (
                        "Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            "Functional intercept only"
                            if func_int == "True"
                            else (
                                "Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = thresh.round(3)

thresholds_results

[0.0]
[0.0]
[0.0]
[0.0]


Unnamed: 0_level_0,RUMBoost,RUMBoost,RUMBoost,RUMBoost,TasteNet,TasteNet,TasteNet,TasteNet
Unnamed: 0_level_1,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects
Thresholds,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,-1.019,-1.003,-1.865,-1.718,-3.119,-4.138,-4.648,-4.544
2,0.148,0.163,-0.694,-0.591,-1.845,-2.523,-3.013,-2.714
3,1.033,1.052,0.203,0.274,-0.649,-1.288,-1.83,-1.636
4,1.792,1.806,0.956,0.996,0.301,-0.343,-0.866,-0.206
5,2.528,2.541,1.688,1.703,1.106,0.447,-0.069,0.612
6,3.262,3.276,2.42,2.412,1.837,1.169,0.658,1.198
7,3.999,4.013,3.149,3.122,2.561,1.881,1.381,1.906
8,4.742,4.757,3.883,3.839,3.284,2.601,2.096,2.544
9,5.56,5.571,4.682,4.625,4.024,3.335,2.83,3.311
10,6.568,6.572,5.648,5.574,4.806,4.116,3.594,4.125


In [133]:
thresholds_results.to_latex(
    "results/easySHARE/thresholds_results.tex",
    index=True,
    float_format="%.3f",
    escape=False,
    column_format="lrrrrrrrr",
)

# Swissmetro

## Benchmarks

In [9]:
path_to_results = os.getcwd() + "/results/SwissMetro/"


index_pd = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        ["Functional intercept and slopes", "Functional slopes only", "Functional intercept only","No functional effects"],
    ],
    names=["Model", "Functional effects"],
)

benchmark_table = pd.DataFrame({},
    columns=[
        "CEL",
        "Comput. time [s]",
    ],
    index=index_pd,
    
)



for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            results = pd.read_csv(
                f"{path_to_results}{model}/results_dict_fi{func_int}_fp{func_params}.csv"
            )

            benchmark_table.loc[
                (model, f"Functional intercept and slopes" if func_int == "True" and func_params == "True" else
                 f"Functional intercept only" if func_int == "True" else
                 f"Functional slopes only" if func_params == "True" else
                 "No functional effects"),
                ["CEL", "Comput. time [s]"],
            ] = results[["loss_test", "train_time"]].values.round(3)
            benchmark_table["Comput. time [s]"] = benchmark_table["Comput. time [s]"].astype(float).round(2)

benchmark_table


Unnamed: 0_level_0,Unnamed: 1_level_0,CEL,Comput. time [s]
Model,Functional effects,Unnamed: 2_level_1,Unnamed: 3_level_1
RUMBoost,Functional intercept and slopes,0.624,6.98
RUMBoost,Functional slopes only,0.679,7.32
RUMBoost,Functional intercept only,0.63,1.76
RUMBoost,No functional effects,0.787,1.71
TasteNet,Functional intercept and slopes,0.669,5.54
TasteNet,Functional slopes only,0.701,4.98
TasteNet,Functional intercept only,0.753,3.25
TasteNet,No functional effects,0.854,9.74


In [10]:
benchmark_table.to_latex("results/SwissMetro/benchmark_table.tex", index=True, float_format="%.3f", escape=False,
    column_format="lrrrr",)

## Variables in the model

In [12]:
from constants import alt_spec_features, PATH_TO_DATA_TRAIN
from utils import pkl_to_df


all_alt_spec_features = []
for k, v in alt_spec_features["SwissMetro"].items():
    all_alt_spec_features.extend(v)

df = pkl_to_df(PATH_TO_DATA_TRAIN["SwissMetro"])
socio_demo_chars = [
    col
    for col in df.columns
    if col not in all_alt_spec_features
    and col not in ["CHOICE"]
]

In [13]:
socio_demo_chars

['MALE_0',
 'MALE_1',
 'AGE_0',
 'AGE_1',
 'AGE_2',
 'AGE_3',
 'AGE_4',
 'INCOME_0',
 'INCOME_1',
 'INCOME_2',
 'INCOME_3',
 'FIRST_0',
 'FIRST_1',
 'WHO_0',
 'WHO_1',
 'WHO_2',
 'PURPOSE_0',
 'PURPOSE_1',
 'PURPOSE_2',
 'PURPOSE_3',
 'LUGGAGE_0',
 'LUGGAGE_1',
 'LUGGAGE_2',
 'GA_0',
 'GA_1']

In [14]:
all_alt_spec_features

['TRAIN_TT',
 'TRAIN_HE',
 'TRAIN_CO',
 'SM_TT',
 'SM_HE',
 'SM_CO',
 'SM_SEATS',
 'CAR_TT',
 'CAR_CO']

## hyperparameters

In [138]:
import os
import pandas as pd

path_to_results = os.getcwd() + "/results/SwissMetro/"

multi_columns = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        [
            "Functional intercept and slopes",
            "Functional slopes only",
            "Functional intercept only",
            "No functional effects",
        ],
    ],
)

indices = [
    "Validation MCEL",
    "Time [s]",
    "best_iteration",
    "lambda_l1",
    "lambda_l2",
    "num_leaves",
    "feature_fraction",
    "bagging_fraction",
    "bagging_freq",
    "min_data_in_leaf",
    "max_bin",
    "min_sum_hessian_in_leaf",
    "min_gain_to_split",
    "batch_size",
    "learning_rate",
    "dropout",
    "act_func",
    "batch_norm",
    "layer_sizes",
]

hyp_results = pd.DataFrame({}, columns=multi_columns, index=indices)

search_space = {
    "Search space": [
        "-",
        "-",
        r"Max 3000 iterations \slash 200 epochs",
        r"$[10^{-8}, 1]$",
        r"$[10^{-8}, 1]$",
        r"$[2, 256]$",
        r"$[0.4, 1]$",
        r"$[0.4, 1]$",
        r"$[1, 7]$",
        r"$[1, 200]$",
        r"$[64, 511]$",
        r"$[10^{-8}, 10]$",
        r"$[10^{-8}, 10]$",
        r"\{256, 512\}",
        r"$[0.0001, 0.01]$",
        r"$[0.0, 0.9]$",
        r"\{ReLU, Sigmoid, Tanh\}",
        r"\{True, False\}",
        r"\{[32], [64], [128], [32, 32], [64, 64], [128, 128], [64, 128], [128, 64], [64, 128, 64]\}",
    ]
}

distribution = {
    "Distribution": [
        "-",
        "-",
        "-",
        "Log-uniform",
        "Log-uniform",
        "Discrete uniform",
        "Uniform",
        "Uniform",
        "Discrete uniform",
        "Discrete uniform",
        "Discrete uniform",
        "Log-uniform",
        "Log-uniform",
        "Categorical",
        "Log-uniform",
        "Uniform",
        "Categorical",
        "Categorical",
        "Categorical",
    ]
}

search_space_series = pd.Series(search_space["Search space"], index=hyp_results.index)
distribution_series = pd.Series(distribution["Distribution"], index=hyp_results.index)

hyp_results.insert(0, ("", "Search space"), search_space_series)
hyp_results.insert(1, ("", "Distribution"), distribution_series)


for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            if func_int == "False" and func_params == "False" and model == "TasteNet":
                continue
            results_dict = pd.read_pickle(
                f"{path_to_results}{model}/best_params_fi{func_int}_fp{func_params}.pkl"
            )
            results = pd.DataFrame(results_dict.values(), index=results_dict.keys())

            hyp_results.loc[
                results.index,
                (
                    model,
                    (
                        "Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            "Functional intercept only"
                            if func_int == "True"
                            else (
                                "Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = results.values.reshape(-1)

for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            opt_and_cel = pd.read_table(
                f"{path_to_results}{model}/hyper_search_info_fi{func_int}_fp{func_params}.txt",
                header=None,
                index_col=0,
            )
            opt_and_cel_dict = {}
            for element in opt_and_cel.index.to_list():
                ind_and_value = element.split(":")
                opt_and_cel_dict[ind_and_value[0]] = float(ind_and_value[1].strip())
            opt_and_cel = pd.DataFrame(
                opt_and_cel_dict.values(),
                index=opt_and_cel_dict.keys(),
                columns=["values"],
            )
            hyp_results.loc[
                "Time [s]",
                (
                    model,
                    (
                        "Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            "Functional intercept only"
                            if func_int == "True"
                            else (
                                "Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = opt_and_cel.loc["Optimisation time"].values.round(0)
            hyp_results.loc[
                "Validation MCEL",
                (
                    model,
                    (
                        f"Functional intercept and slopes"
                        if func_int == "True" and func_params == "True"
                        else (
                            f"Functional intercept only"
                            if func_int == "True"
                            else (
                                f"Functional slopes only"
                                if func_params == "True"
                                else "No functional effects"
                            )
                        )
                    ),
                ),
            ] = opt_and_cel.loc["Best value"].values.round(3)

hyp_results = hyp_results.rename(index={"best_iteration": "Best iteration/epoch"})
hyp_results = hyp_results.fillna("-")
hyp_results.loc["learning_rate", ("RUMBoost")] = 0.05

In [139]:
hyp_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RUMBoost,RUMBoost,RUMBoost,RUMBoost,TasteNet,TasteNet,TasteNet,TasteNet
Unnamed: 0_level_1,Search space,Distribution,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects,Functional intercept and slopes,Functional slopes only,Functional intercept only,No functional effects
Validation MCEL,-,-,0.635,0.695,0.633,0.769,0.624,0.667,0.712,0.835
Time [s],-,-,840.0,770.0,219.0,197.0,524.0,448.0,526.0,1093.0
Best iteration/epoch,Max 3000 iterations \slash 200 epochs,-,931.0,785.0,399.0,1156.0,71.0,68.0,44.0,-
lambda_l1,"$[10^{-8}, 1]$",Log-uniform,0.000577,0.000373,0.053499,0.0,0.000098,0.420822,0.000012,-
lambda_l2,"$[10^{-8}, 1]$",Log-uniform,0.0,0.002573,0.000041,0.005099,0.700295,0.0,0.000006,-
num_leaves,"$[2, 256]$",Discrete uniform,9.0,77.0,244.0,223.0,-,-,-,-
feature_fraction,"$[0.4, 1]$",Uniform,0.485203,0.768465,0.677433,0.508769,-,-,-,-
bagging_fraction,"$[0.4, 1]$",Uniform,0.990212,0.87907,0.442821,0.981164,-,-,-,-
bagging_freq,"$[1, 7]$",Discrete uniform,5.0,6.0,7.0,3.0,-,-,-,-
min_data_in_leaf,"$[1, 200]$",Discrete uniform,81.0,108.0,39.0,15.0,-,-,-,-


In [140]:
hyp_results.to_latex(
    "results/SwissMetro/hyperparameter_results.tex",
    index=True,
    float_format="%.3f",
    escape=False,
    column_format="l" + "r" * (len(hyp_results.columns) - 1),
)