This notebook is used to generate the times and additional metrics (f1, accuracy, mean disparity) for the paper for the final budget. You may run this notebook after running all experiments.

In [1]:
import pandas as pd

In [2]:
fname = "final_budget_metrics.csv"

## Read and process the final budget metrics 

In [3]:
final_budget_metrics_df = pd.read_csv(fname)

audit_regions_name_dict = {
    "Non-Overlapping KMeans": "Clusters",
    "Overlapping KMeans": "Scan Regions",
    "Overlapping Partitionings": "Grids",
}
fairness_notion_name_dict = {
    "Equal Opportunity": "EO",
    "Statistical Parity": "SP",
}
final_budget_metrics_df["Audit Regions"] = final_budget_metrics_df["Audit Regions"].map(
    audit_regions_name_dict
)
final_budget_metrics_df["Fairness Notion"] = final_budget_metrics_df[
    "Fairness Notion"
].map(fairness_notion_name_dict)

# combine experiment columns info to create unique ids for each experiment
final_budget_metrics_df["exp_desc"] = (
    final_budget_metrics_df["Dataset"]
    + "_"
    + final_budget_metrics_df["Classifier"]
    + "_"
    + final_budget_metrics_df["Audit Regions"]
    + "_"
    + final_budget_metrics_df["Fairness Notion"]
)
# remove the results of the Promis-Exact method with high work limit
final_budget_metrics_df = final_budget_metrics_df[
    final_budget_metrics_df["Method"] != "promis_opt_wlimit_1800"
]

In [6]:
methods = [
    "init",
    "iter",
    "promis_opt_wlimit_300",
    "promis_app",
    "FairWhere",
]

final_res = {
    "Dataset": [],
    "Classifier": [],
    "Audit Regions": [],
    "Fairness": [],
    "Budget": [],
    "Init MLR": [],
    "PROMIS-Direct MLR": [],
    "PROMIS-Approx MLR": [],
    "FairWhere MLR": [],
    "SpatialFlip MLR": [],
    "Init Mean Disparity": [],
    "PROMIS-Direct Mean Disparity": [],
    "PROMIS-Approx Mean Disparity": [],
    "FairWhere Mean Disparity": [],
    "SpatialFlip Mean Disparity": [],
    "PROMIS-Direct Time": [],
    "PROMIS-Approx Time": [],
    "FairWhere Time": [],
    "SpatialFlip Time": [],
    "Init Accuracy": [],
    "PROMIS-Direct Accuracy": [],
    "PROMIS-Approx Accuracy": [],
    "FairWhere Accuracy": [],
    "SpatialFlip Accuracy": [],
    "Init F1": [],
    "PROMIS-Direct F1": [],
    "PROMIS-Approx F1": [],
    "FairWhere F1": [],
    "SpatialFlip F1": [],
}
methods_to_labels = {
    'init': "Init", 
    'iter': "SpatialFlip", 
    'promis_opt_wlimit_300': "PROMIS-Direct", 
    'promis_app': "PROMIS-Approx", 
    'FairWhere': "FairWhere"
}

# fill in the final results dataframe
for exp_desc in final_budget_metrics_df['exp_desc'].unique():
    exp_df = final_budget_metrics_df[final_budget_metrics_df['exp_desc'] == exp_desc]
    final_res["Dataset"].append(exp_df['Dataset'].values[-1])
    final_res["Classifier"].append(exp_df['Classifier'].values[-1])
    final_res["Audit Regions"].append(exp_df['Audit Regions'].values[-1])
    final_res["Fairness"].append(exp_df['Fairness Notion'].values[-1])
    final_res["Budget"].append(exp_df['Budget'].values[-1])
    for method in methods:
        method_df = exp_df[exp_df['Method'] == method]
        
        for metric in ["Time", "MLR", "Mean Disparity", "Accuracy", "F1"]:
            label = methods_to_labels[method] + " " + metric
            if method_df.empty:
                final_res[label].append(None)
            else:
                if metric == "Time":
                    if method != "init":
                        final_res[label].append(method_df["Time"].values[0])
                else:
                    final_res[label].append(method_df[metric].values[0])
            
final_res_df = pd.DataFrame(final_res)


In [8]:
# separate the results for each set of experiments

DNN_mlr_exp_df = final_res_df[final_res_df['Classifier'] == 'DNN'][["Audit Regions", "Fairness", "Budget", "Init MLR", "PROMIS-Direct MLR", "PROMIS-Approx MLR", "FairWhere MLR", "SpatialFlip MLR"]]
DNN_mean_disparity_exp_df = final_res_df[final_res_df['Classifier'] == 'DNN'][["Audit Regions", "Fairness", "Budget", "Init Mean Disparity", "PROMIS-Direct Mean Disparity", "PROMIS-Approx Mean Disparity", "FairWhere Mean Disparity", "SpatialFlip Mean Disparity"]]
DNN_f1_exp_df = final_res_df[final_res_df['Classifier'] == 'DNN'][["Audit Regions", "Fairness", "Budget", "Init F1", "PROMIS-Direct F1", "PROMIS-Approx F1", "FairWhere F1", "SpatialFlip F1"]]
DNN_times_df = final_res_df[final_res_df['Classifier'] == 'DNN'][["Audit Regions", "Fairness", "Budget", "PROMIS-Direct Time", "PROMIS-Approx Time", "FairWhere Time", "SpatialFlip Time"]]

LAR_mlr_exp_df = final_res_df[final_res_df['Dataset'] == 'LAR'][["Audit Regions", "Budget", "Init MLR", "PROMIS-Direct MLR", "PROMIS-Approx MLR", "SpatialFlip MLR"]]
LAR_times_df = final_res_df[final_res_df['Dataset'] == 'LAR'][["Audit Regions", "Budget", "PROMIS-Direct Time", "PROMIS-Approx Time", "SpatialFlip Time"]]

synth_mlr_exp_df = final_res_df[final_res_df['Classifier'] == 'Unfair by Design'][["Audit Regions", "Budget", "Init MLR", "PROMIS-Direct MLR", "PROMIS-Approx MLR", "SpatialFlip MLR"]]
synth_times_df = final_res_df[final_res_df['Classifier'] == 'Unfair by Design'][["Audit Regions", "Budget", "PROMIS-Direct Time", "PROMIS-Approx Time", "SpatialFlip Time"]]

XGB_mlr_exp_df = final_res_df[final_res_df['Classifier'] == 'XGB'][["Audit Regions", "Budget", "Init MLR", "PROMIS-Direct MLR", "PROMIS-Approx MLR"]]
XGB_acc_exp_df = final_res_df[final_res_df['Classifier'] == 'XGB'][["Audit Regions", "Budget", "Init Accuracy", "PROMIS-Direct Accuracy", "PROMIS-Approx Accuracy"]]
XGB_times_df = final_res_df[final_res_df['Classifier'] == 'XGB'][["Audit Regions", "Budget", "PROMIS-Direct Time", "PROMIS-Approx Time"]]

## Display Times for All Experiments

In [None]:
def custom_round(x):
    if isinstance(x, (int, float)):  
        return str(int(round(x))) if x >= 1 else str(round(x, 2))
    return x  
times = final_res_df[["Dataset","Audit Regions", "Classifier", "Fairness", 'PROMIS-Direct Time', 'PROMIS-Approx Time', 'FairWhere Time',
       'SpatialFlip Time'	]].copy()
times.fillna("-", inplace=True)
numeric_columns = ['PROMIS-Direct Time', 'PROMIS-Approx Time', 'FairWhere Time', 'SpatialFlip Time']
times[numeric_columns] = times[numeric_columns].applymap(custom_round)

display(times.drop_index().to_markdown(index=False))

## Display Metrics for each set of experiments

In [None]:
print("DNN MLR")
display(DNN_mlr_exp_df)

print("DNN Mean Disparity")
display(DNN_mean_disparity_exp_df)

print("DNN F1")
display(DNN_f1_exp_df)

print("DNN Times")
display(DNN_times_df)

print("LAR MLR")
display(LAR_mlr_exp_df)

print("LAR Times")
display(LAR_times_df)

print("Synth MLR")
display(synth_mlr_exp_df)

print("Synth Times")
display(synth_times_df)

print("XGB MLR")
display(XGB_mlr_exp_df)

print("XGB Accuracy")
display(XGB_acc_exp_df)

print("XGB Times")
display(XGB_times_df)