# Binary case

## Hyperparameter tuning

In [None]:
from src.validation import check_results
from joblib import Parallel, delayed
from itertools import product

datasets = ['bank', 'adult', 'heart_disease', 'german']
gammas = [0.03, 0.07, 0.1]
algorithm = ['fair_rbh']
distance_metric = {'fair_rbh': ['heom']}
distance_num = [0]
app_ns = [4]
approach_number = {'fair_rbh': [0, 1, 2, 3, 4]}
iterations = [0, 1, 2, 3, 4]
models = ['logistic_regression', 'decision_tree', 'mlp']
all_options = list(product(datasets, algorithm, distance_num, gammas, app_ns, iterations))
config_path = '../configs'
results_path = '../validation'
data_path = '../data'

Parallel(n_jobs=-1)(delayed(check_results)(d_name, alg, distance_metric[alg][dist_type], gamma, approach_number[alg][app_n], models, idx, results_path=results_path, config_path=config_path, data_path=data_path) for d_name, alg, dist_type, gamma, app_n, idx in all_options)

In [None]:
from src.validation_fhf import check_results as fhf_check

datasets = ['adult', 'bank', 'adult', 'heart_disease', 'german']
distance_metric = {'fawos': ['heom'], 'hfos': ['hvdm'], 'fos': ['heom']}
algorithm_name = ['fos', 'fawos', 'hfos']
tested_values = {'fawos': [  {'safe_weight': 0, 'borderline_weight': 0.5, 'rare_weight': 0.5},
            {'safe_weight': 0, 'borderline_weight': 0.6, 'rare_weight': 0.4},
            {'safe_weight': 0.33, 'borderline_weight': 0.33, 'rare_weight': 0.33},], 
             'hfos': [3, 7, 11], 
             'fos': [3, 7, 11]}

tested_values_num = [0, 1, 2]
dist_num = [0]
iterations = [0, 1, 2, 3, 4]
models = ['logistic_regression', 'decision_tree', 'mlp']
all_options = list(product(datasets, dist_num, algorithm_name, tested_values_num, iterations))
config_path = '../configs'
results_path = '../validation'
data_path = '../data'
perform_fair = True

Parallel(n_jobs=-1)(delayed(fhf_check)(d_name, distance_metric[app_n][dist_type], app_n, models, idx, tested_values[app_n][t], results_path=results_path,
                                           config_path=config_path, data_path=data_path, perform_fair=perform_fair) for d_name, dist_type, app_n, t, idx in all_options)

## Experiment

In [None]:
from src.experiments import experiment

datasets = ['german', 'adult']
algorithms = ['fawos', 'fos', 'hfos', 'fair_rbh', 'fair_rbu']
models = ['logistic_regression', 'decision_tree', 'mlp']
kfolds = 10
encoding = 'cont_ord_cat'
date = '2024-08-24'
config_path = '../configs'
results_path = '../results'
data_path = '../data'
iterations = [i for i in range(0, 10)]
seeds = [42 for i in iterations]
all_options = list(product(datasets, algorithms, iterations))

Parallel(n_jobs=-1)(delayed(experiment)(d_name, a, models, idx, date, seeds[idx], kfolds=kfolds, enc_type=encoding,
                                        results_path=results_path, config_path=config_path, data_path=data_path) for
                                        d_name, a, idx in all_options)

In [None]:
from src.experiments import experiment

datasets = ['bank', 'heart_disease']
algorithms = ['fawos', 'fos', 'hfos', 'fair_rbh', 'fair_rbu']
models = ['logistic_regression', 'decision_tree', 'mlp']
kfolds = 5
encoding = 'cont_ord_cat'
date = '2024-08-24'
config_path = '../configs'
results_path = '../results'
data_path = '../data'
iterations = [i for i in range(0, 5)]
seeds = [42 for i in iterations]
all_options = list(product(datasets, algorithms, iterations))

Parallel(n_jobs=-1)(delayed(experiment)(d_name, a, models, idx, date, seeds[idx], kfolds=kfolds, enc_type=encoding,
                                        results_path=results_path, config_path=config_path, data_path=data_path) for
                                        d_name, a, idx in all_options)

# Multi case

## Hyperparameter tuning

In [None]:
from src.validation_multi import check_results as multi_check_results

datasets = ['german', 'adult', 'bank']
gammas = [0.03, 0.07, 0.1]
algorithm = ['fair_rbh']
distance_metric = {'fair_rbh': ['heom']}
distance_num = [0]
approach_number = {'fair_rbh': [0, 1, 2, 3, 4]}
iterations = [0, 1, 2, 3, 4]
app_ns = [4]
models = ['logistic_regression', 'decision_tree', 'mlp']
all_options = list(product(datasets, algorithm, distance_num, gammas, app_ns, iterations))
config_path = '../configs'
results_path = '../validation_multi'
data_path = '../data'

Parallel(n_jobs=-1)(delayed(multi_check_results)(d_name, alg, distance_metric[alg][dist_type], gamma, approach_number[alg][app_n], models, idx, results_path=results_path,
                                           config_path=config_path, data_path=data_path) for
                    d_name, alg, dist_type, gamma, app_n, idx in all_options)

In [None]:
from src.validation_fhf_multi import check_results as multi_fh_check_results

datasets = ['adult', 'german', 'bank']
distance_metric = {'fawos': ['heom'], 'hfos': ['hvdm']}
algorithm_name = ['fawos', 'hfos']
tested_values = {'fawos': [  {'safe_weight': 0, 'borderline_weight': 0.5, 'rare_weight': 0.5},
            {'safe_weight': 0, 'borderline_weight': 0.6, 'rare_weight': 0.4},
            {'safe_weight': 0.33, 'borderline_weight': 0.33, 'rare_weight': 0.33},], 
             'hfos': [3, 7, 11]}
tested_values_num = [0, 1, 2]
distance_metric_num = [0]
iterations = [0, 1, 2, 3, 4]
models = ['logistic_regression', 'decision_tree', 'mlp']
all_options = list(product(datasets, distance_metric_num, algorithm_name, tested_values_num, iterations))
config_path = '../configs'
results_path = '../validation_multi'
data_path = '../data'
perform_fair = True

Parallel(n_jobs=-1)(
    delayed(multi_fh_check_results)(d_name, distance_metric[app_n][dist_type], app_n, models, idx, tested_values[app_n][t],
                           results_path=results_path,
                           config_path=config_path, data_path=data_path, perform_fair=perform_fair)
    for
    d_name, dist_type, app_n, t, idx in all_options)

## Experiment

In [None]:
from src.experiments_multi import experiment as experiment_multi

datasets = ['german', 'adult', 'bank']  # , 'adult', 'bank']
algorithms = ['hfos', 'fawos', 'fair_rbh', 'fair_rbu']
models = ['logistic_regression', 'decision_tree', 'mlp']
kfolds = 5
encoding = 'cont_ord_cat'
date = '2024-06-29'
config_path = '../configs'
results_path = '../results_multi'
data_path = '../data'
iterations = [i for i in range(0, 5)]
seeds = [42 for i in iterations]
all_options = list(product(datasets, algorithms, iterations))

Parallel(n_jobs=-1)(delayed(experiment_multi)(d_name, a, models, idx, date, seeds[idx], kfolds=kfolds, enc_type=encoding,
                                        results_path=results_path, config_path=config_path, data_path=data_path) for
                    d_name, a, idx in all_options)