# TODO!
1. load data, set-up boilerplate
2. load all hyperparameter spaces
3. sample n samples from each
4. launch train-evaluate functions for each sample
5. join everything in a DF following the same structure as the GBM experiments
6. save everything to disk
___

In [None]:
import os
import sys
import numpy as np
import pandas as pd

from pathlib import Path

In [None]:
#DATASET_NAME = "AOF-FairHO"
#DATASET_NAME = "AOF-Fairbench"
DATASET_NAME = "Adult-2021"

#EXPERIMENT_NAME = "baselines"
EXPERIMENT_NAME = "baselines/Fairlearn-EG"

# Number of Random Search trials per algorithm
N_RS_TRIALS = 100

# Set test threshold on validation ?
SET_TEST_THRESHOLD_ON_VALIDATION = False

N_THREADS = 2

In [None]:
SEED = 24

from random import Random
rng = Random(SEED)

# Relevant paths

In [None]:
from utils.datasets import get_dataset_details
dataset_details = get_dataset_details(DATASET_NAME, local_or_cluster="local")

# Expose dataset details as global variables :)
globals().update(dataset_details)

In [None]:
data_path = {
    "train": train_data_path,
    "validation": val_data_path,
    "test": test_data_path,
}

experiment_path = root_path / "experiments" / DATASET_NAME
configs_path = experiment_path / "confs" / EXPERIMENT_NAME
results_path = experiment_path / "results" / EXPERIMENT_NAME
results_path.mkdir(exist_ok=True)

# Load data

In [None]:
def get_X_y_s(data, label_col, sensitive_col, unawareness: bool = False):
    ignored_cols = {label_col, sensitive_col} if unawareness else {label_col}
    feature_cols = [col for col in data.columns if col not in ignored_cols]
    return (
        data[feature_cols],
        data[label_col].to_numpy(dtype=int),
        data[sensitive_col].to_numpy(dtype=int))

## Train data is not needed (and some models may even use different train datasets)
data = {}
for key, val in data_path.items():
    if val is None: continue

    df = pd.read_csv(val, sep="\t", index_col=0, header=0)
    X, y, s = get_X_y_s(
        df,
        label_col=label_col,
        sensitive_col=sensitive_col,
    )
    
    data[key] = df
    data[f"X_{key}"] = X
    data[f"y_{key}"] = y
    data[f"s_{key}"] = s

# Load hyperparameter spaces
- load all hyperparameter YAML files under the \<experiment\>/confs folder

In [None]:
import re
from utils.fairautoml_tuners_utils import load_hyperparameter_space

yaml_regex = re.compile(r"^(?P<name>.+)[\.]hyperparameter-space.yaml$")
hyperparam_spaces = dict()

for file_name in os.listdir(configs_path):
    m = yaml_regex.match(file_name)

    if m:
        file_path = configs_path / file_name
        hyperparam_spaces[m.group("name")] = load_hyperparameter_space(file_path)

# Randomly sample _n_ hyperparameter configs per algorithm
- and write to files under the confs/\<algorithm\> folder

In [None]:
from utils.hyperparams import suggest_random_hyperparams_with_classpath

configs = {
    algo_name: [
        suggest_random_hyperparams_with_classpath(hyper_space, seed=rng.randrange(2**32 - 1))
        for _ in range(N_RS_TRIALS)
    ]
    for algo_name, hyper_space in hyperparam_spaces.items()
}

In [None]:
# Save configs to disk
import json

configs_file_path = configs_path / f"configs.{N_RS_TRIALS}-trials-per-algo.json"
with open(configs_file_path, "w") as out_file:
    json.dump(configs, out_file, indent=4)
    print(f"Saved configs to JSON file at '{configs_file_path}'")

# Train models, evaluate, and save results

In [None]:
###
# ** Set-up run_trial function with common kwargs
###
from utils.model_evaluation import try_hyperparams
from tqdm import tqdm

progress_bar = tqdm(
    total=sum(len(algo_configs) for algo_configs in configs.values()),
    desc="Progress", position=0)

def run_trial(hyperparams):
    ret = dict()

    try:
        ret = try_hyperparams(
            hyperparams,
            data=data,
            eval_on_train=False,
            target_metric=target_metric,
            target_value=target_value,
            set_test_threshold_on_validation=SET_TEST_THRESHOLD_ON_VALIDATION,
            n_threads=1,  # this is actually ignored for all models except LGBM
        )
    except RuntimeError as err:
        logging.error(f"Trial failed with error '{err}'")
    finally:
        progress_bar.update()

    return ret


###
# ** Create a ThreadPool and launch trials **
###
from concurrent.futures import ThreadPoolExecutor
experiment_results = dict()

for algo_name, algo_configs in configs.items():
    with ThreadPoolExecutor(max_workers=N_THREADS) as thread_pool:
        algo_results = thread_pool.map(run_trial, algo_configs)

    # Expand results iterator to a list (all threads have finished)
    experiment_results[algo_name] = list(algo_results)

# Save results to disk

In [None]:
RESULTS_NAME = (
    f"{DATASET_NAME}.BASELINE-results."
    f"{N_RS_TRIALS}-trials-per-algo."
    f"{target_metric}={target_value:.2}."
    f"use-val-threshold={SET_TEST_THRESHOLD_ON_VALIDATION}"
    #f".randomized-clf={FAIRGBM_RANDOMIZED_CLASSIFIER}"
)
RESULTS_NAME

### Save dict as a JSON

In [None]:
import json
from utils import NumpyJSONEncoder

# Save JSON file
results_file_path = results_path / f"{RESULTS_NAME}.json"
with open(results_file_path, "w") as out_file:
    json.dump(experiment_results, out_file, indent=4, cls=NumpyJSONEncoder)
    print(f"Saved JSON results to file at '{results_file_path}'")

### Save organized results as a DataFrame

In [None]:
# Remove 'time-taken' to form DF
tmp_results_for_df = {
    frst_lvl: [
        {
            inner_key: inner_val
            for inner_key, inner_val in curr_eval.items()
            if inner_key != "time-taken"
        }
        for curr_eval in experiment_results[frst_lvl]
    ]
    for frst_lvl in experiment_results
}

# Flatten results in order to convert them to a DataFrame
results_df = {
    (f"{frst_lvl}.{scnd_lvl:03}", dataset_type): dataset_result
    for frst_lvl, frst_lvl_list in tmp_results_for_df.items()
    for scnd_lvl, scnd_lvl_dict in enumerate(frst_lvl_list)
    for dataset_type, dataset_result in scnd_lvl_dict.items()
}

results_df = pd.DataFrame(results_df).transpose()
results_df

# Save to disk
df_file_path = results_path / f"{RESULTS_NAME}.csv"
results_df.to_csv(df_file_path)
print(f"Saved CSV results to file at '{df_file_path}'")

___
___
# Some plots

In [None]:
results_df = pd.read_csv(df_file_path, index_col=(0,1))
results_df

## Test results!

In [None]:
## Only TEST results
test_results_table = results_df.loc[pd.IndexSlice[:, "test"], :].droplevel(1, axis=0)
test_results_table[perf_metric] = 1. - test_results_table["fnr"]
test_results_table = test_results_table.drop(columns="fnr")

eval_columns = [
    perf_metric,
    fair_metric,
    "fpr_diff",
    "fnr_diff",
    "roc_auc",
    "threshold",
]
test_results_table = test_results_table[eval_columns]
test_results_table["model"] = [model_idx[:-4] for model_idx in test_results_table.index]
test_results_table["id"] = [int(model_idx[-3:]) for model_idx in test_results_table.index]

test_results_table.sort_values(by=perf_metric, axis=0, ascending=False)

## Validation results!

In [None]:
## Only VALIDATION results
val_results_table = results_df.loc[pd.IndexSlice[:, "validation"], :].droplevel(1, axis=0)
val_results_table[perf_metric] = 1. - val_results_table["fnr"]
val_results_table = val_results_table.drop(columns="fnr")

eval_columns = [
    perf_metric,
    fair_metric,
    "fpr_diff",
    "fnr_diff",
    "roc_auc",
    "threshold",
]
val_results_table = val_results_table[eval_columns]
val_results_table["model"] = [model_idx[:-4] for model_idx in val_results_table.index]
val_results_table["id"] = [int(model_idx[-3:]) for model_idx in val_results_table.index]

val_results_table.sort_values(by=perf_metric, axis=0, ascending=False)