In [1]:
import pandas as pd
import os

# Benchmarks

In [7]:
path_to_results = os.getcwd() + "/results/"


index_pd = pd.MultiIndex.from_product(
    [
        ["RUMBoost", "TasteNet"],
        ["Functional intercept and slopes", "Functional slopes only", "Functional intercept only","No functional effects"],
    ],
    names=["Model", "Functional effects"],
)

benchmark_table = pd.DataFrame({},
    columns=[
        "MAE",
        "EMAE",
        "MCEL",
        "Comput. time [s]",
    ],
    index=index_pd,
    
)



for model in ["RUMBoost", "TasteNet"]:
    for func_int in ["True", "False"]:
        for func_params in ["True", "False"]:
            results = pd.read_csv(
                f"{path_to_results}{model}/results_dict_fi{func_int}_fp{func_params}.csv"
            )

            benchmark_table.loc[
                (model, f"Functional intercept and slopes" if func_int == "True" and func_params == "True" else
                 f"Functional intercept only" if func_int == "True" else
                 f"Functional slopes only" if func_params == "True" else
                 "No functional effects"),
                ["MAE", "EMAE", "MCEL", "Comput. time [s]"],
            ] = results[["mae_test", "emae_test", "loss_test", "train_time"]].values.round(3)
            benchmark_table["Comput. time [s]"] = benchmark_table["Comput. time [s]"].astype(float).round(2)

benchmark_table


Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,EMAE,MCEL,Comput. time [s]
Model,Functional effects,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RUMBoost,Functional intercept and slopes,1.369,0.146,0.251,168.41
RUMBoost,Functional slopes only,1.37,0.146,0.251,178.04
RUMBoost,Functional intercept only,1.368,0.146,0.251,241.08
RUMBoost,No functional effects,1.413,0.152,0.26,292.95
TasteNet,Functional intercept and slopes,1.37,0.145,0.251,84.09
TasteNet,Functional slopes only,1.375,0.147,0.252,85.42
TasteNet,Functional intercept only,1.377,0.148,0.252,45.6
TasteNet,No functional effects,1.425,0.151,0.262,307.65


In [8]:
benchmark_table.to_latex("results/benchmark_table.tex", index=True, float_format="%.3f", escape=False,
    column_format="lrrrr",)

# Variables in the model

In [10]:
from constants import alt_spec_features, PATH_TO_DATA

df = pd.read_csv(PATH_TO_DATA)
socio_demo_chars = [
    col
    for col in df.columns
    if col not in alt_spec_features
    and col not in ["mergeid", "hhid", "coupleid", "depression_scale"]
]

In [11]:
socio_demo_chars

['female',
 'age',
 'hhsize',
 'nb_children',
 'hh_net_income',
 'born_in_itw_country_yes',
 'education_level_1',
 'education_level_2',
 'education_level_3',
 'education_level_4',
 'education_level_5',
 'education_level_6',
 'education_none',
 'education_studying',
 'marital_status_married',
 'marital_status_reg_partnership',
 'marital_status_separated',
 'marital_status_single',
 'marital_status_widowed',
 'partnerinhh_yes',
 'mother_alive_yes',
 'father_alive_yes',
 'help_outside_hh_yes',
 'smoking_yes',
 'ever_smoked_yes',
 'vigorous_activity_more_than_once_a_week',
 'vigorous_activity_never',
 'vigorous_activity_once_three_times_a_month',
 'job_situation_homemaker',
 'job_situation_other',
 'job_situation_retired',
 'job_situation_unable_to_work',
 'job_situation_unemployed',
 'household_ends_meet_fairly_easily',
 'household_ends_meet_with_great_difficulty',
 'household_ends_meet_with_some_difficulty',
 'has_citizenship_yes',
 'country_BE',
 'country_BG',
 'country_CH',
 'country_C

In [12]:
alt_spec_features

['chronic_mod',
 'nb_doctor_visits',
 'maxgrip',
 'daily_activities_index',
 'instrumental_activities_index',
 'mobilityind',
 'lgmuscle',
 'grossmotor',
 'finemotor',
 'recall_1',
 'recall_2',
 'bmi',
 'sphus_fair',
 'sphus_good',
 'sphus_poor',
 'sphus_very_good',
 'hospitalised_last_year_yes',
 'nursing_home_last_year_yes_permanently',
 'nursing_home_last_year_yes_temporarily']

# Alt-spec feature names

In [None]:
feature_names = {
    "bmi": "BMI",
    "chronic_mod": "Number of chronic conditions",
    "daily_activities_index": "Daily activities index",
    "finemotor": "Fine motor skills",
    "grossmotor": "Gross motor skills",
    "hospitalised_last_year_yes": "Hospitalised last year",
    "lgmuscle": "Large muscle skills",
    "maxgrip": "Max grip strength",
    "mobilityind": "Mobility index",
    "nursing_home_last_year_yes_permanently": "Nursing home last year (permanently)",
    "nursing_home_last_year_yes_temporarily": "Nursing home last year (temporarily)",
    "recall_1": "Recall 1",
    "recall_2": "Recall 2",
    "sphus_fair": "Self-perceived health - fair",
    "sphus_good": "Self-perceived health - good",
    "sphus_poor": "Self-perceived health - poor",
    "sphus_very_good": "Self-perceived health - very good",
    "sphus_very_poor": "Self-perceived health - very poor",
    "instrumental_activities_index": "Instrumental activities index",
    "nb_doctor_visits": "Number of doctor visits",
}

print([key for key in feature_names.values()])

['BMI', 'Number of chronic conditions', 'Daily activities index', 'Fine motor skills', 'Gross motor skills', 'Hospitalised last year', 'Large muscle skills', 'Max grip strength', 'Mobility index', 'Nursing home last year (permanently)', 'Nursing home last year (temporarily)', 'Recall 1', 'Recall 2', 'Self-perceived health - excellent', 'Self-perceived health - fair', 'Self-perceived health - good', 'Self-perceived health - poor', 'Self-perceived health - very good', 'Self-perceived health - very poor', 'Instrumental activities index', 'Number of doctor visits']


# hyperparameters

# thresholds