This notbeook performs the inference and analysis for a chosen experiment.

**Instructions**
* In the second cell you may change the path to the results folders. The default values should work, if the default results paths values were not changed.
* In the section **Choose Experiment for Analysis**, you can opt which experiment to run analysis for by uncommeting the related commented info. To rerun analysis for another experiment, please restart the notebook and uncomment only the experiment to run analysis for.
* with_spatial_flip variable indicates wether the SpatialFlip method should be included for the analysis. It is set to true only for statistical parity experiments. You can set it to false to run analysis without it.
* apply_fit_flips=true indicates to PROMIS methods to apply the precomputed flips like SpatialFlip method does.
* only_methods variable is a list indicating which methods to include in the analysis. It is used only in the experiment on the LAR dataset, to compare PROMIS Opt (wlimit=300) with PROMIS App and PROMIS opt (wlimit=300) with PROMIS opt (wlimit=1800). 

**Analysis**
1. Reads related experiment info data.
2. Reads pretrained models for SpatialFlip, PROMIS methods, performs predictions for test set and reads precomputed predictions for FairWhere method.
3. Computes MLR (for statistical parity or equal opportunity depending on the experiment).
4. Computes Accuracy/F1 score, except for LAR (which does not include ground truths), unfair by design (which is semi-synthetic) experiment.
5. Computes Disparity (FairWhere unfairness score definition) only for the DNN experiment.
6. Computes Normalized Statistics (LR) (by dividing regions' statistics with the maximum statistic of the initial world).
7. Shows the above computed metrics plus fit times, budgets where PROMIS Opt reached limit, final budget metrics

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import matplotlib
sys.path.append(os.path.abspath(os.path.join("..")))
from analysis.analyse_results_func import *
from sklearn import metrics
from utils.plot_utils import *
from utils.data_utils import (
    read_scanned_regs,
    get_y,
    get_pos_info_regions,
    read_all_models,
)
from utils.scores import get_mlr
from utils.results_names_utils import get_train_val_test_paths, combine_world_info
from sklearn import metrics
import ast
import random
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
base_path = "../../data/"
results_base_path = "../../results/" # path to the base results folder
save_plots_base_path = "../../plots/" # path to the base plots folder to save the plots or "" to not save them 
final_budget_metrics_fname = "final_budget_metrics.csv" # file name to save the final budget results or "" to not save them 

with_spatial_flip = False # True: only for experiments with spatial flip to be considered
apply_fit_flips = False # True: for all experiments where statistical parity is assessed except for the experiment with DNN model
exact_wlim_comparison = False # True: compare only the PROMIS-Exact solutions computed with different working limits
only_methods = []  # True: indicates which methods to be used for the analysis. If empty all methods are considered

dnn_exp_dir = "dnn_exp/" # directory name for the DNN experiments
xgb_eq_opp_dir = "xgb_eq_opp_exp/" # directory name for the XGB experiments with equal opportunity fairness notion
lar_exp_dir = "lar_exp/" # directory name for the LAR experiments
semi_synth_dir = "crime_semi_synth_exp/" # directory name for the semi synthetic experiments
dataset_name = "crime" # default dataset name. For the LAR dataset, it is set to "lar"
lar_dataset_name = "lar"

figsize = (14, 8) 
display_title = True

seed = 42
np.random.seed(seed)  
random.seed(seed)

## Choose Experiment for Analysis

### DNN Experiment (Equal Opportunity)

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name = (
#     "dnn",
#     "non_overlap_k_8",
#     True,
#     "equal_opportunity",
#     dnn_exp_dir,
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name = (
#     "dnn",
#     "5_x_5",
#     True,
#     "equal_opportunity",
#     dnn_exp_dir,
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name = (
#     "dnn",
#     "overlap_k_10_radii_4",
#     True,
#     "equal_opportunity",
#     dnn_exp_dir,
# )

### DNN Experiment (Statistical Parity)

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, with_spatial_flip = (
#     "dnn",
#     "non_overlap_k_8",
#     True,
#     "statistical_parity",
#     dnn_exp_dir,
#     True
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, with_spatial_flip = (
#     "dnn",
#     "5_x_5",
#     True,
#     "statistical_parity",
#     dnn_exp_dir,
#     True
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, with_spatial_flip = (
#     "dnn",
#     "overlap_k_10_radii_4",
#     True,
#     "statistical_parity",
#     dnn_exp_dir,
#     True
# )

### LAR Experiment

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, apply_fit_flips, with_spatial_flip, only_methods = (
#     "",
#     "non_overlap_k_100",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_app", "promis_opt_wlimit_300", "iter"]
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, apply_fit_flips, with_spatial_flip, only_methods = (
#     "",
#     "5_x_5",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_app", "promis_opt_wlimit_300", "iter"]
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, apply_fit_flips, with_spatial_flip, only_methods = (
#     "",
#     "overlap_k_100_radii_30",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_app", "promis_opt_wlimit_300", "iter"]
# )

### LAR Experiment - PROMIS Opt with With Lower and Higher Limits

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, with_spatial_flip, apply_fit_flips, only_methods, exact_wlim_comparison  = (
#     "",
#     "non_overlap_k_100",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_opt_wlimit_300", "promis_opt_wlimit_1800"],
#     True
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, with_spatial_flip, apply_fit_flips, only_methods, exact_wlim_comparison  = (
#     "",
#     "5_x_5",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_opt_wlimit_300", "promis_opt_wlimit_1800"],
#     True
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, dataset_name, with_spatial_flip, apply_fit_flips, only_methods, exact_wlim_comparison  = (
#     "",
#     "overlap_k_100_radii_30",
#     False,
#     "statistical_parity",
#     lar_exp_dir,
#     lar_dataset_name,
#     True,
#     True,
#     ["promis_opt_wlimit_300", "promis_opt_wlimit_1800"],
#     True
# )

### Semi Synthetic Experiment

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, apply_fit_flips = (
#     "semi_synthetic_regions_non_overlap_k_8",
#     "non_overlap_k_8",
#     False,
#     "statistical_parity",
#     semi_synth_dir,
#     True,
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, apply_fit_flips = (
#     "semi_synthetic_regions_5_x_5",
#     "5_x_5",
#     False,
#     "statistical_parity",
#     semi_synth_dir,
#     True
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, apply_fit_flips = (
#     "semi_synthetic_regions_overlap_k_10_radii_4",
#     "overlap_k_10_radii_4",
#     True,
#     "statistical_parity",
#     semi_synth_dir,
#     True,
# )

### XGB Experiment (Equal Opportunity)

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods = (
#     "xgb",
#     "non_overlap_k_8",
#     False,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_app", "promis_opt_wlimit_300"],
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods = (
#     "xgb",
#     "5_x_5",
#     True,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_app", "promis_opt_wlimit_300"],
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods = (
#     "xgb",
#     "overlap_k_10_radii_4",
#     True,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_app", "promis_opt_wlimit_300"],
# )

### XGB Experiment (Equal Opportunity) - PROMIS Opt with With Lower and Higher Limits

audit regions = Clusters 

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods, exact_wlim_comparison = (
#     "xgb",
#     "non_overlap_k_8",
#     False,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_opt_wlimit_120", "promis_opt_wlimit_300", "promis_opt_wlimit_7200"],
#     True
# )

audit regions = Grids

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods, exact_wlim_comparison = (
#     "xgb",
#     "5_x_5",
#     True,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_opt_wlimit_120", "promis_opt_wlimit_300", "promis_opt_wlimit_7200"],
#     True
# )

audit regions = Scan Regions

In [None]:
# clf_name, partioning_type_name, overlap, fairness_notion, dir_name, only_methods, exact_wlim_comparison = (
#     "xgb",
#     "overlap_k_10_radii_4",
#     True,
#     "equal_opportunity",
#     xgb_eq_opp_dir,
#     ["promis_opt_wlimit_120", "promis_opt_wlimit_300", "promis_opt_wlimit_7200"],
#     True
# )

In [None]:
results_base_path = os.path.join(results_base_path, dir_name)

## Set Display Settings

In [None]:
method_to_display_name = {
    "iter": "SpatialFlip",
    "promis_app": "PROMIS-Approx",
    "promis_opt": "PROMIS-Direct",
    "promis_opt_wlimit_90": "PROMIS-Direct (wlimit=90)",
    "promis_opt_wlimit_120": "PROMIS-Direct (wlimit=120)",
    "promis_opt_wlimit_300": "PROMIS-Direct",
    "promis_opt_wlimit_1800": "PROMIS-Direct (wlimit=1800)",
    "promis_opt_wlimit_3600": "PROMIS-Direct (wlimit=3600)",
    "promis_opt_wlimit_7200": "PROMIS-Direct (wlimit=7200)",
    "promis_opt_wlimit_10800": "PROMIS-Direct (wlimit=10800)",
    "init": "Base Model",
    "where": "FairWhere",
}

if exact_wlim_comparison:
    method_to_display_name["promis_opt_wlimit_300"] = "PROMIS-Direct (wlimit=300)"

colors_list = [
    "#1f77b4",
    "#ff7f0e",
    "#2ca02c",
    "#d62728",
    "#9467bd",
    "#8c564b",
    "#e377c2",
    "#7f7f7f",
    "black",
    "darkred",
    "darkgreen",
    "darkblue",
    "darkmagenta",
    "darkcyan",
    "darkorange",
    "darkviolet",
    "darkturquoise",
    "darkslategray",
    "darkgoldenrod",
    "darkolivegreen",
    "darkseagreen",
    "darkslateblue",
    "darkkhaki",
]

method_to_plot_info = {
    "promis_app": {
        "linewidth": 5,
        "color": "darkgreen",
        "linestyle": "-",
        "scatter_marker": "o",
        "marker_size": 150,
    },
    "promis_opt_wlimit_90": {
        "linewidth": 5,
        "color": "yellow",
        "linestyle": "-",
        "scatter_marker": "p",
        "marker_size": 150,
    },
    "promis_opt_wlimit_120": {
        "linewidth": 5,
        "color": "lightgreen",
        "linestyle": "-",
        "scatter_marker": "v",
        "marker_size": 150,
    },
    "promis_opt_wlimit_300": {
        "linewidth": 5,
        "color": "black",
        "linestyle": "-",
        "scatter_marker": "X",
        "marker_size": 150,
    },
    "promis_opt_wlimit_1800": {
        "linewidth": 5,
        "color": "purple",
        "linestyle": "-",
        "scatter_marker": "<",
        "marker_size": 150,
    },
    "promis_opt_wlimit_3600": {
        "linewidth": 5,
        "color": "lightblue",
        "linestyle": "-",
        "scatter_marker": ".",
        "marker_size": 150,
    },
    "promis_opt_wlimit_7200": {
        "linewidth": 5,
        "color": "orange",
        "linestyle": "-",
        "scatter_marker": "^",
        "marker_size": 150,
    },
    "promis_opt_wlimit_10800": {
        "linewidth": 5,
        "color": "darkslategray",
        "linestyle": "-",
        "scatter_marker": "^",
        "marker_size": 150,
    },
    "iter": {
        "linewidth": 5,
        "color": "saddlebrown",
        "linestyle": "-",
        "scatter_marker": ">",
        "marker_size": 150,
    },
    "where": {
        "linewidth": 5,
        "color": "red",
        "linestyle": "-",
        "scatter_marker": "^",
        "marker_size": 150,
    },
    "init": {
        "linewidth": 5,
        "color": "blue",
        "linestyle": "-",
        "scatter_marker": "*",
        "marker_size": 150,
    }
}

## Read Trained Models 

In [None]:
res_desc_label, partioning_name, prediction_name = combine_world_info(
    dataset_name, partioning_type_name, clf_name
)
train_path_info, val_path_info, test_path_info = get_train_val_test_paths(
    base_path, partioning_name, prediction_name, dataset_name
)

(
    val_regions_df,
    val_pred_df,
    val_labels_df,
    y_pred_val,
    y_pred_probs_val,
    y_true_val,
    val_points_per_region,
    pos_y_true_indices_val,
    pos_points_per_region_val,
) = (
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
)
if dataset_name == "lar":
    test_regions_df = read_scanned_regs(train_path_info["regions"])
    test_pred_df = pd.read_csv(f"{base_path}preprocess/lar.csv")
    y_pred_test = get_y(test_pred_df, "label")
    y_true_test = None
    y_pred_probs_test=None

    test_points_per_region = test_regions_df["points"].tolist()
    pos_y_true_indices_test, pos_points_per_region_test = None, None
else:
    test_regions_df = read_scanned_regs(test_path_info["regions"])
    test_pred_df = pd.read_csv(test_path_info["predictions"])
    test_labels_df = pd.read_csv(test_path_info["labels"])
    y_pred_test = get_y(test_pred_df, "pred")
    y_pred_probs_test = get_y(test_pred_df, "prob") if not clf_name.startswith("semi_synthetic") else None
    y_true_test = get_y(test_labels_df, "label")

    test_points_per_region = test_regions_df["points"].tolist()
    pos_y_true_indices_test, pos_points_per_region_test = get_pos_info_regions(
        y_true_test, test_points_per_region
    )
    
results_path = f"{results_base_path}{res_desc_label}/"

save_plots_path = ""
if save_plots_base_path:
    save_plots_path = os.path.join(save_plots_base_path, dir_name, res_desc_label, f"{fairness_notion}/")
    os.makedirs(save_plots_path, exist_ok=True)
    if exact_wlim_comparison:
        save_plots_path = os.path.join(save_plots_path, "exact_wlim_comparison/")
        os.makedirs(save_plots_path, exist_ok=True)
    print(f"Save plots path: {save_plots_path}")

sp_flip_meths_2_pretrained_models = {}
if with_spatial_flip:
    sp_flip_meths_2_pretrained_models = read_all_models(
        f"{results_path}spatial_flip_models/{fairness_notion}/",
        False,
        methods=only_methods,
    )

sp_opt_meths_2_pretrained_models = read_all_models(
    f"{results_path}spatial_optim_models/{fairness_notion}/", True, methods=only_methods
)
all_meths_2_pretrained_models = {
    **sp_flip_meths_2_pretrained_models,
    **sp_opt_meths_2_pretrained_models,
}

splitted_labels = get_all_methods_modes_labels(
    list(all_meths_2_pretrained_models.keys())
)

opt_methods_display_labels = splitted_labels["opt_labels"]
for label in all_meths_2_pretrained_models.keys():
    if label not in method_to_display_name:
        method_to_display_name[label] = label

## Perform Predictions - Compute Results Information

In [None]:
all_methods_to_results_info, budget_range = compute_all_results_info(
    all_meths_2_pretrained_models=all_meths_2_pretrained_models,
    test_points_per_region=test_points_per_region,
    y_pred_test_probs=y_pred_probs_test,
    y_true_test=y_true_test,
    y_pred_test_orig=y_pred_test,
    apply_fit_flips=apply_fit_flips,
)

sp_flip_methods_2_results_info = {
    k: v
    for k, v in all_methods_to_results_info.items()
    if k in splitted_labels["heu_labels"]
}
sp_opt_methods_2_results_info = {
    k: v
    for k, v in all_methods_to_results_info.items()
    if k in splitted_labels["opt_labels"]
}

In [None]:
P_test = np.sum(y_pred_test)
N_test = len(y_pred_test)
RHO_test = P_test / N_test

def to_region_dict(pts_per_region):
    return [{"points": reg} for reg in pts_per_region]

init_test_mlr_st_par, init_test_stats_st_par = get_mlr(
    y_pred_test, test_points_per_region, with_stats=True
)

init_test_mlr_eq_opp, init_test_stats_eq_opp = get_mlr(
    y_pred_test[pos_y_true_indices_test], pos_points_per_region_test, with_stats=True
) if dataset_name != "lar" else (None, None)

init_acc_test = metrics.accuracy_score(y_true_test, y_pred_test) if y_true_test is not None else None
init_f1_test = metrics.f1_score(y_true_test, y_pred_test) if y_true_test is not None else None

print(f"N_test: {N_test}")
print(f"P_test: {P_test}")
print(f"RHO_test: {RHO_test:.3f}")

if dataset_name != "lar":
    TP_test = np.sum(y_pred_test[pos_y_true_indices_test])
    TPR_test = TP_test / len(pos_y_true_indices_test)
    print(f"TPR_test: {TPR_test:.3f}")

## Compute Disparity, Metrics on FairWhere Predictions 

In [None]:
def get_pr(y_pred):
    if len(y_pred) == 0:
        return 0
    return np.sum(y_pred) / len(y_pred)


if clf_name == "dnn":
    with open(f"{results_path}{dataset_name}_{fairness_notion}_where_fit_time.txt", "r") as file:
        where_fit_time = float(file.read())

    where_pred_test_df = pd.read_csv(
        f"{results_path}{dataset_name}_{fairness_notion}_where_model_test_pred.csv"
    )
    test_partitioning_id_df = pd.read_csv(
        f"{base_path}partitionings/test_{partioning_name}_partitioning_ids.csv"
    )

    y_pred_where_test = get_y(where_pred_test_df, "pred")

    test_partitioning_id_df["id"] = test_partitioning_id_df["id"].apply(
        ast.literal_eval
    )
    test_partitioning_id_df["partitioning"] = test_partitioning_id_df[
        "partitioning"
    ].apply(ast.literal_eval)

    test_ids = test_partitioning_id_df["id"].tolist()
    test_partitionings = test_partitioning_id_df["partitioning"].tolist()

    (all_methods_to_results_info,
    P_where_test,
    RHO_where_test,
    TP_where_test,
    TPR_where_test,
    mlr_where_st_par_test,
    mlr_where_eq_opp_test,
    acc_where_test,
    f1_where_test,
    init_fairness_loss_list_test,
    init_fairness_loss_list_weighted_test,
    init_fairness_loss_sum_test,
    init_fairness_loss_sum_weighted_test,
    where_fairness_loss_list_test,
    where_fairness_loss_list_weighted_test,
    where_fairness_loss_sum_test,
    where_fairness_loss_weighted_sum_test) = compute_avg_disparity_where_metrics(
        all_methods_to_results_info,
        y_pred_test,
        y_pred_where_test,
        y_true_test,
        test_points_per_region,
        pos_points_per_region_test,
        test_ids,
        test_partitionings,
        fair_score_func=metrics.recall_score if fairness_notion == "equal_opportunity" else get_pr,
    )

## Computes Maximum Budget Info

In [None]:
final_results_df = compute_max_budget_info(
    all_methods_to_results_info=all_methods_to_results_info,
    budget_range=budget_range,
    dataset_name=dataset_name,
    clf_name=clf_name,
    partioning_type_name=partioning_type_name,
    fairness_notion=fairness_notion,
    points_per_region=test_points_per_region,
    init_mlr_st_par=init_test_mlr_st_par,
    init_mlr_eq_opp=init_test_mlr_eq_opp,
    init_stats_st_par=init_test_stats_st_par,
    init_stats_eq_opp=init_test_stats_eq_opp,
    where_fit_time=where_fit_time if clf_name == "dnn" else None,
    y_pred_where=y_pred_where_test if clf_name == "dnn" else None,
    y_true=y_true_test,
    mlr_where_st_par=mlr_where_st_par_test if clf_name == "dnn" else None,
    mlr_where_eq_opp=mlr_where_eq_opp_test if clf_name == "dnn" else None,
    init_f1=init_f1_test,
    f1_where_test=f1_where_test if clf_name == "dnn" else None, 
    init_acc=init_acc_test,
    init_fairness_loss_sum=init_fairness_loss_sum_test if clf_name == "dnn" else None,
    where_fairness_loss_sum=where_fairness_loss_sum_test if clf_name == "dnn" else None,
)

if final_budget_metrics_fname:
    file_exists = os.path.isfile(final_budget_metrics_fname)
    final_results_df.to_csv(final_budget_metrics_fname, mode='a', index=False, header=not file_exists)

In [None]:
# display_title=False
# save_plots_path = f"{save_plots_path}no_title/"
# os.makedirs(save_plots_path, exist_ok=True)
# figsize=(4.5, 3.5)
# for method, settings in method_to_plot_info.items():
#     settings["linewidth"] = 2.5
#     settings["marker_size"] = 30

## Weighted and Unweighted Disparity Across Partitionings

In [None]:
if clf_name == "dnn":
    plot_fairness_loss_per_partitioning(
        test_ids,
        init_fairness_loss_list_test,
        where_fairness_loss_list_test,
        init_fairness_loss_list_weighted_test,
        where_fairness_loss_list_weighted_test,
        save_plots_path,
        display_title,
        "Test"
    )

## Budgets where PROMIS Methods Reach Limit

In [None]:
for method in splitted_labels["opt_labels"]:
    if "status" in all_methods_to_results_info[method].columns:
        unique_status = list(all_methods_to_results_info[method]["status"].unique())
        for status in unique_status:
            if status not in [1, 3]:
                other_status_exp_idxs = all_methods_to_results_info[method][
                    all_methods_to_results_info[method]["status"] == status
                ]["exp_idx"].unique()
                print(
                    f"Found status {status} for method {method} for exp indexes: {other_status_exp_idxs}"
                )
method_tlimit_cnt = {
    method: 0
    for method in splitted_labels["opt_labels"]
    if "status" in all_methods_to_results_info[method].columns
}

labels = []
status_lists = []
budget_lists = []
for method in splitted_labels["opt_labels"]:
    if "status" in all_methods_to_results_info[method].columns:
        labels.append(method)
        status_list = all_methods_to_results_info[method]["status"].to_list()
        budget_list = all_methods_to_results_info[method]["budget"].to_list()
        tlimit_cnt = len(np.where(np.array(status_list) == 3)[0])
        method_tlimit_cnt[method] += tlimit_cnt

        status_lists.append(status_list)
        budget_lists.append(budget_list)

meths_min_C_reach_limit = {}
for method in splitted_labels["opt_labels"]:
    res_df = all_methods_to_results_info[method]
    if 3 in res_df.status.tolist():
        meths_min_C_reach_limit[method] = res_df[res_df["status"] == 3]["budget"].min()

In [None]:
if meths_min_C_reach_limit:
    plot_opt_methods_status(
        labels=labels,
        budget_lists=budget_lists,
        status_lists=status_lists,
        save_path=save_plots_path,
        figsize=figsize,
    )

    plot_min_C_reach_limit(
        meths_min_C_reach_limit,
        method_to_plot_info,
        method_to_display_name,
        opt_methods_display_labels,
        figsize=figsize,
        save_path=save_plots_path,
        display_title=display_title,

    )
else:
    print("No PROMIS method reached the work limit")

## MLR

MLR per budget for PROMIS methods

In [None]:
budget_mlrs = {
    "Budget": budget_range
}

for method in all_methods_to_results_info.keys():
    method_mlrs = []
    mlr_label = "mlr_eq_opp_test" if fairness_notion == "equal_opportunity" else "mlr_st_par_test"
    for budget in budget_range:
        res_df = all_methods_to_results_info[method]
        mlr = res_df[res_df["budget"] == budget][mlr_label].values[0]
        method_mlrs.append(mlr)
    budget_mlrs[method_to_display_name[method]] = method_mlrs
if "PROMIS-Direct" in budget_mlrs:
    budget_mlrs["PROMIS-Direct (wlimit=300)"] = budget_mlrs["PROMIS-Direct"]
    del budget_mlrs["PROMIS-Direct"]
budget_mlrs_df = pd.DataFrame(budget_mlrs)
sorted_methods = sorted(list(budget_mlrs.keys()))
sorted_methods.remove("Budget")
budget_mlrs_df = budget_mlrs_df[["Budget"] + sorted_methods]
display(budget_mlrs_df)

In [None]:
cur_method_to_plot_info = method_to_plot_info.copy()
for method in method_to_plot_info.keys():
    cur_method_to_plot_info[method]["marker_size"] = 80
    cur_method_to_plot_info[method]["linewidth"] = 3

In [None]:
flips_limit = None
if fairness_notion == "statistical_parity":
    plot_scores(
        methods_to_res_info=all_methods_to_results_info,
        init_mlr=init_test_mlr_st_par,
        method_to_plot_info=method_to_plot_info if display_title else cur_method_to_plot_info,
        method_to_display_name=method_to_display_name,
        save_plots_path=save_plots_path,
        figsize=figsize if display_title else (4.7, 2.8),
        flips_limit=flips_limit,
        append_to_title=" (Statistical Parity - Test Set)",
        append_to_save="_st_par_test",
        score_label="mlr_st_par_test",
        display_title=display_title,
        other_mlr=mlr_where_st_par_test if clf_name == "dnn" else None,
        other_mlr_method="where" if clf_name == "dnn" else None,
    )

if fairness_notion == "equal_opportunity":
    plot_scores(
        methods_to_res_info=all_methods_to_results_info,
        init_mlr=init_test_mlr_eq_opp,
        method_to_plot_info=method_to_plot_info if display_title else cur_method_to_plot_info,
        method_to_display_name=method_to_display_name,
        save_plots_path=save_plots_path,
        figsize=figsize if display_title else (4.7, 2.8),
        flips_limit=flips_limit,
        append_to_title=" (Equal Opportunity - Test Set)",
        append_to_save="_eq_opp_test",
        score_label="mlr_eq_opp_test",
        display_title=display_title,
        other_mlr=mlr_where_eq_opp_test if clf_name == "dnn" else None,
        other_mlr_method="where" if clf_name == "dnn" else None,
    )

<!-- params[['mlr', 'pos_mlr', 'test_mlr', 'pos_test_mlr']]
all_methods_to_results_info['cont_in_ov_over_eq_opp'][['sol_mlr', 'pos_mlr', 'new_val_mlr', 'val_new_pos_mlr', 'new_test_mlr', 'test_new_pos_mlr', 'budget']] -->

## Fairness and Performance Metrics per Budget

In [None]:
init_scores = {
    "mlr_st_par": {
        "test": init_test_mlr_st_par,
    },
    "mlr_eq_opp": {
        "test": init_test_mlr_eq_opp,
    },
    "accuracy": {
        "test": init_acc_test,
    },
    "f1": {
        "test": init_f1_test,
    },
    "fair_loss_sum":
    {
        "test": init_fairness_loss_sum_test if clf_name == "dnn" else None,
    }
}

fair_scores_display_labels = {
    "mlr_st_par": "MLR",
    "mlr_eq_opp": "MLR",
    "fair_loss_sum": "Mean Disparity"
}
performance_scores_display_labels = {
    "accuracy": "Accuracy",
    "f1": "F1 Score",
}
sets_display_labels = {
    "sol": "Solution",
    "val": "Validation Set",
    "test": "Test Set"
}

In [None]:
if display_title:
    score1_vs_score2_figsize = (9, 14)  
elif clf_name == "dnn":
    score1_vs_score2_figsize = (4, 7)
else:
    score1_vs_score2_figsize = (4, 4.5)

if y_true_test is not None:
    sets = ["test"]
    if fairness_notion == "statistical_parity":
        fair_scores = ["mlr_st_par"]
    else:
        fair_scores = ["mlr_eq_opp"]

    if clf_name == "dnn":
        where_scores = {
            "mlr_st_par": {
                "test": mlr_where_st_par_test,
            },
            "mlr_eq_opp": {
                "test": mlr_where_eq_opp_test,
            },
            "accuracy": {
                "test": acc_where_test,
            },
            "f1": {
                "test": f1_where_test,
            },
            "fair_loss_sum":
            {
                "test": where_fairness_loss_sum_test,
            }
        }
        fair_scores.append("fair_loss_sum")
        performance_scores = ["f1"]
    else:
        performance_scores = ["accuracy"]

    for set_ in sets:
        for performance_score in performance_scores:

            # set the min and max axis values for the performance score
            # as the global min and max values per group of experiments 
            # (e.g., DNN considering statistical parity)

            if clf_name == "dnn":
                if fairness_notion == "statistical_parity":
                    score_2_min_axis = 0.3868
                    score_2_max_axis = 0.4903
                else:
                    score_2_min_axis = 0.4332
                    score_2_max_axis = 0.4880
            elif clf_name == "xgb":
                score_2_min_axis = 0.7276
                score_2_max_axis = 0.7288
            else:
                score_2_min_axis = None
                score_2_max_axis = None

            if score_2_max_axis is not None and score_2_min_axis is not None:
                score_2_min_axis = score_2_min_axis - 0.006
                score_2_max_axis = score_2_max_axis + 0.006

            plot_score1_vs_score2(
                methods_to_res_info=all_methods_to_results_info,
                score_label1=f"{fair_scores[0]}_{set_}",
                score_label2=f"{performance_score}_{set_}",
                score_label3=f"{fair_scores[1]}_{set_}" if len(fair_scores) > 1 else None,
                score_display_label1=fair_scores_display_labels[fair_scores[0]],
                score_display_label2=performance_scores_display_labels[performance_score],
                score_display_label3=fair_scores_display_labels[fair_scores[1]] if len(fair_scores) > 1 else None,
                init_score1=init_scores[fair_scores[0]][set_],
                init_score2=init_scores[performance_score][set_],
                init_score3=init_scores[fair_scores[1]][set_] if len(fair_scores) > 1 else None,
                method_to_plot_info=method_to_plot_info,
                method_to_display_name=method_to_display_name,
                save_plots_path=save_plots_path,
                figsize=score1_vs_score2_figsize,
                append_to_title=f" ({sets_display_labels[set_]})",
                display_title=display_title,
                other_score1=where_scores[fair_scores[0]][set_] if clf_name == "dnn" else None,
                other_score2=where_scores[performance_score][set_] if clf_name == "dnn" else None,
                other_score_3=where_scores[fair_scores[1]][set_] if clf_name == "dnn" and len(fair_scores) > 1 else None,
                other_method="where" if clf_name == "dnn" else None,
                score_2_min_axis=score_2_min_axis,
                score_2_max_axis=score_2_max_axis,
            )

## Positive, Positive Rate, Actual Flips per Method per Budget

In [None]:
actual_flips_where_test = (
    np.sum(np.abs(y_pred_where_test - y_pred_test)) if clf_name == "dnn" else None
)
actual_pos_flips_where_test = (
    np.sum(
        np.abs(
            y_pred_where_test[pos_y_true_indices_test]
            - y_pred_test[pos_y_true_indices_test]
        )
    )
    if clf_name == "dnn"
    else None
)
plot_compare_methods_info(
    all_methods_to_results_info,
    P_test,
    RHO_test,
    p_label="P_test",
    rho_label="RHO_test",
    actual_flips_label="actual_flips_test",
    method_to_plot_info=method_to_plot_info if display_title else cur_method_to_plot_info,
    method_to_display_name=method_to_display_name,
    save_path=save_plots_path,
    figsize=figsize,
    append_to_title=f" ({fairness_notion} - Test Set)",
    display_title=display_title,
    other_P=P_where_test if clf_name == "dnn" else None,
    other_RHO=RHO_where_test if clf_name == "dnn" else None,
    other_actual_flips=actual_flips_where_test,
    other_actual_pos_flips=actual_pos_flips_where_test,
    other_method="where",
)

## Regions Statistics 

In [None]:
display(final_results_df)

In [None]:
xlabel = "Regions"
ylabel = "MLR"

methods_labels = final_results_df["Method"].unique().tolist()
stats_per_method = (
    final_results_df.groupby("Method", sort=False)["Statistics"]
    .apply(list)
    .tolist()
)
stats_per_method = [np.concatenate(stats).tolist() for stats in stats_per_method]

if not display_title:
    if partioning_name.startswith("regions_5_x_5"):
        figsize = (14, 2.5)
    elif partioning_name.startswith("regions_overlap"):
        figsize = (10, 2.5)
    else:
        figsize = (4.5, 2.5)

plot_regions_norm_stats(
    methods_stats=stats_per_method,
    methods_labels=methods_labels,
    xlabel=xlabel,
    save_path=save_plots_path,
    append_to_title="(Test Set)",
    display_title=display_title,
    method_to_display_name=method_to_display_name,
    method_to_plot_info=method_to_plot_info,
    figsize=figsize,
)