In [1]:
import os
import sys
import numpy as np
import pandas as pd

from pathlib import Path

In [2]:
#DATASET_NAME = "AOF-FairHO"
DATASET_NAME = "AOF-Fairbench"
#DATASET_NAME = "Adult-2021"

EXPERIMENT_NAME = "randomly-generated-configs"
SET_TEST_THRESHOLD_ON_VALIDATION = False

N_THREADS = 20
SEED = 42

# USE RANDOMIZED CLASSIFIER FOR FAIRGBM PREDICTIONS ?
FAIRGBM_RANDOMIZED_CLASSIFIER = True

# Minimum number of Boosting iterations to use for the randomized classifier
MIN_ITER = 100
#MIN_ITER = 250

In [4]:
root_path = Path("/mnt/home/andre.cruz/fair-boosting/")
#root_path = Path("/home/andre.cruz/Documents/fair-boosting/")

experiment_path = root_path / "experiments" / DATASET_NAME

data_path = root_path / "data" / DATASET_NAME
columns_path = data_path / "cols_order.csv"

results_path = experiment_path / "results" / EXPERIMENT_NAME

# Data paths

In [5]:
# AOF (FairHO version) data paths
if DATASET_NAME == "AOF-FairHO":
    train_data_path = data_path / "pre-processed_train.header.csv"
    val_data_path = data_path / "pre-processed_validation.header.csv"
    test_data_path = data_path / "pre-processed_test.header.csv"

    label_col = "fraud_bool"
    sensitive_col = "age-group"
    
    TARGET_METRIC = "fpr"; TARGET_VALUE = 0.05

elif DATASET_NAME == "AOF-Fairbench":
    train_data_path = data_path / "candidate_random_sample_handpicked_1_train_sanitized_one_hot.processed-for-lightgbm-cpp.TRAIN.csv"
    val_data_path = data_path / "candidate_random_sample_handpicked_1_train_sanitized_one_hot.processed-for-lightgbm-cpp.VAL.csv"
    test_data_path = data_path / "candidate_random_sample_handpicked_1_val_sanitized_one_hot.processed-for-lightgbm-cpp.TEST.csv"

    label_col = "fraud_bool"
    sensitive_col = "age-group"

    TARGET_METRIC = "fpr"; TARGET_VALUE = 0.05
    
elif DATASET_NAME == "Adult-2021":
    train_data_path = data_path / "ACSIncome.train.header.csv"
    val_data_path = data_path / "ACSIncome.validation.header.csv"
    test_data_path = data_path / "ACSIncome.test.header.csv"

    label_col = "PINCP"
    sensitive_col = "SEX"

    #TARGET_METRIC = "threshold"; TARGET_VALUE = 0.5

else:
    raise ValueError(f"Not configured for this dataset: {DATASET_NAME}")
    
print(f"Loaded dataset {DATASET_NAME}!")

Loaded dataset Adult-2021!


___

In [6]:
data_path = {
    # We don't really need train results, and they take a while to compute...
    #"train": train_data_path,
    "validation": val_data_path,
    "test": test_data_path,
}

## Load Data

In [7]:
def get_y_s(data):
    return data[label_col].to_numpy(dtype=int), data[sensitive_col].to_numpy(dtype=int)

## Train data is not needed (and some models may even use different train datasets)
data = {}
for key, val in data_path.items():
    if val is None: continue

    df = pd.read_csv(val, sep="\t", index_col=0, header=0)
    y, s = get_y_s(df)
    
    data[key] = df
    data[f"y_{key}"] = y
    data[f"s_{key}"] = s

CPU times: user 508 ms, sys: 160 ms, total: 668 ms
Wall time: 670 ms


___

## Compute predictions from a randomized classifier

In [8]:
from numpy.random import RandomState
rng = RandomState(SEED)

def randomized_classifier_predictions(model, df, min_iter: int = 1) -> np.ndarray:

    max_iter = model.num_trees()
    if min_iter >= max_iter:
        print(f"Got min_iter={min_iter}, max_iter={max_iter};", file=sys.stderr)
        min_iter = max(max_iter - 1, 0)

    num_rows, num_feats = df.shape
    y_scores = np.zeros(num_rows)
    
    # Assign a random model iteration to each row
    y_iter = rng.randint(low=min_iter, high=max_iter, size=(num_rows,))
    
    # For each model iteration, run predict for all corresponding rows
    for curr_iter in range(min_iter, max_iter):
        
        # Rows assigned to the current model iteration
        rows_for_curr_iter = np.argwhere(y_iter == curr_iter).flatten()

        curr_iter_preds = model.predict(
            df.iloc[rows_for_curr_iter],
            num_iteration=curr_iter,
            num_threads=N_THREADS)

        y_scores[rows_for_curr_iter] = curr_iter_preds

    return y_scores

___

## Load data from each experiment and compute results

In [9]:
from utils import evaluate_predictions

def evaluate_model(model, target_metric, target_value, set_test_threshold_on_validation: bool, randomized_classifier: bool = False):
    
    results = {}
    # Evaluate predictions for each data set
    ordered_sets = ["train", "validation", "test"]
    for elem in ordered_sets:
        if elem not in data:
            #print(f"{elem} set not found. Skipping...")
            continue

        df, y_true, s_true = data[elem], data[f"y_{elem}"], data[f"s_{elem}"]
        if randomized_classifier:
            y_scores = randomized_classifier_predictions(model, df, min_iter=MIN_ITER)
        else:
            y_scores = model.predict(df, num_threads=N_THREADS)
        
        if elem == "test" and set_test_threshold_on_validation:
            eval_kwargs = {"threshold": results["validation"]["threshold"]}
        else:
            eval_kwargs = {target_metric: target_value}

        results[elem] = evaluate_predictions(
            y_true=y_true, y_pred=y_scores, s_true=s_true,
            tie_breaker=True, **eval_kwargs,
        )

    return results

In [None]:
from utils import load_lightgbm_model
from tqdm import tqdm

models = {}
results = {}

#for experiment in tqdm(os.listdir(results_path)):
for exp_dir, subdirs, files in tqdm(os.walk(results_path)):
    exp_dir = Path(exp_dir)
    experiment = f"{exp_dir.parent.name}.{exp_dir.name}"

    # Model found under this directory ?
    if "model.txt" in files:
        # Load LGBM model
        model_path = exp_dir / "model.txt"
        models[experiment] = load_lightgbm_model(model_path)
        
        # Evaluate model and save results
        try:
            results[experiment] = evaluate_model(
                models[experiment], target_metric=TARGET_METRIC, target_value=TARGET_VALUE,
                set_test_threshold_on_validation=SET_TEST_THRESHOLD_ON_VALIDATION,
                randomized_classifier=(FAIRGBM_RANDOMIZED_CLASSIFIER and exp_dir.parent.name.startswith("FairGBM"))
            )
        except Exception as e:
            print(f"Failed evaluating model '{experiment}', error: {e}")

## Save results to a json file

In [19]:
# CHECK THIS!
RESULTS_NAME = (
    f"{DATASET_NAME}-results."
    f"{TARGET_METRIC}={TARGET_VALUE:.2}."
    f"use-val-threshold={SET_TEST_THRESHOLD_ON_VALIDATION}."
    f"randomized-clf={FAIRGBM_RANDOMIZED_CLASSIFIER}"
)
RESULTS_NAME

'Adult-2021-results.threshold=0.5.use-val-threshold=False.randomized-clf=False'

In [20]:
import json
from utils import NumpyJSONEncoder

results_file_path = results_path / f"{RESULTS_NAME}.json"
with open(results_file_path, "w") as out_file:
    json.dump(results, out_file, indent=4, cls=NumpyJSONEncoder)
    print(f"Saved results to file at '{results_file_path}'")

Saved results to file at '/mnt/home/andre.cruz/fair-boosting/experiments/Adult-2021/results/randomly-generated-configs/Adult-2021-results.threshold=0.5.use-val-threshold=False.randomized-clf=False.json'


## Organize results in a table (pd.DataFrame)

In [21]:
results_df = {
    (frst_lvl, scnd_lvl): value
    for frst_lvl in results
    for (scnd_lvl, value) in results[frst_lvl].items()
}

results_df = pd.DataFrame(results_df).transpose()
results_df

Unnamed: 0,Unnamed: 1,threshold,pp,pn,prevalence,pred-prevalence,fpr,tnr,tpr,fnr,accuracy,...,tpr_group-1,fnr_group-1,accuracy_group-1,precision_group-1,roc_auc_group-1,f1_group-1,fpr_ratio,fpr_diff,fnr_ratio,fnr_diff
LightGBM.000,val,0.5,116814.0,214272.0,0.370753,0.352821,0.133497,0.866503,0.725061,0.274939,0.814063,...,0.668811,0.331189,0.833343,0.735263,0.889568,0.700465,0.568633,0.075109,0.727199,0.090349
LightGBM.000,test,0.5,117133.0,213953.0,0.370315,0.353784,0.134517,0.865483,0.726628,0.273372,0.814063,...,0.669821,0.330179,0.832847,0.729899,0.888302,0.698571,0.579589,0.073141,0.725520,0.090627
LightGBM.001,val,0.5,115217.0,215869.0,0.370753,0.347997,0.131749,0.868251,0.715017,0.284983,0.811439,...,0.659464,0.340536,0.830217,0.731393,0.888730,0.693569,0.586979,0.070067,0.737980,0.089227
LightGBM.001,test,0.5,115266.0,215820.0,0.370315,0.348145,0.132094,0.867906,0.715520,0.284480,0.811475,...,0.659571,0.340429,0.830799,0.729395,0.888058,0.692728,0.584782,0.070681,0.737807,0.089258
LightGBM.002,val,0.5,117592.0,213494.0,0.370753,0.355171,0.135863,0.864137,0.727383,0.272617,0.813435,...,0.671034,0.328966,0.831859,0.730050,0.889653,0.699299,0.580545,0.073710,0.724875,0.090507
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LightGBM.097,test,0.5,115951.0,215135.0,0.370315,0.350214,0.129058,0.870942,0.726270,0.273730,0.817368,...,0.666237,0.333763,0.835229,0.738391,0.893380,0.700461,0.572399,0.071733,0.713050,0.095773
LightGBM.098,val,0.5,115354.0,215732.0,0.370753,0.348411,0.127775,0.872225,0.722878,0.277122,0.816854,...,0.666415,0.333585,0.834902,0.740893,0.894497,0.701683,0.579307,0.069587,0.728135,0.090690
LightGBM.098,test,0.5,115619.0,215467.0,0.370315,0.349211,0.128933,0.871067,0.723774,0.276226,0.816522,...,0.666237,0.333763,0.835109,0.738051,0.893148,0.700308,0.575013,0.071095,0.724980,0.091792
LightGBM.099,val,0.5,115285.0,215801.0,0.370753,0.348203,0.129647,0.870353,0.719139,0.280861,0.814290,...,0.659076,0.340924,0.832664,0.738475,0.890821,0.696520,0.566769,0.073355,0.717027,0.096472


In [22]:
results_df.to_csv(results_path / f"{RESULTS_NAME}.csv")

___
___

In [15]:
results_df = pd.read_csv(results_path / f"{RESULTS_NAME}.csv", index_col=(0,1))

In [29]:
if DATASET_NAME in ("AOF-FairHO", "AOF-Fairbench"):
    perf_metric = "Recall"
    fair_metric = "fpr_ratio"

elif DATASET_NAME == "Adult-2021":
    perf_metric = "accuracy"
    fair_metric = "fnr_ratio"

else:
    raise ValueError(f"Invalid dataset '{DATASET_NAME}'")

## TEST Results!

In [32]:
## Only TEST results
test_results_table = results_df.loc[pd.IndexSlice[:, "test"], :].droplevel(1, axis=0)
test_results_table[perf_metric] = 1. - test_results_table["fnr"]
test_results_table = test_results_table.drop(columns="fnr")

eval_columns = [
    perf_metric,
    fair_metric,
    "fpr_diff",
    "fnr_diff",
    "roc_auc",
    "threshold",
]
test_results_table = test_results_table[eval_columns]
test_results_table["model"] = [model_idx[:-4] for model_idx in test_results_table.index]
test_results_table["id"] = [int(model_idx[-3:]) for model_idx in test_results_table.index]

test_results_table.sort_values(by=perf_metric, axis=0, ascending=False)

Unnamed: 0,accuracy,fnr_ratio,fpr_diff,fnr_diff,roc_auc,threshold,model,id
LightGBM.042,0.734181,0.705299,0.075202,0.096087,0.895806,0.5,LightGBM,42
LightGBM.016,0.734075,0.703294,0.074600,0.096929,0.895590,0.5,LightGBM,16
LightGBM.024,0.732990,0.706327,0.073590,0.096104,0.896493,0.5,LightGBM,24
LightGBM.074,0.732868,0.707968,0.073097,0.095491,0.895424,0.5,LightGBM,74
LightGBM.030,0.732631,0.704950,0.073505,0.096787,0.896509,0.5,LightGBM,30
...,...,...,...,...,...,...,...,...
LightGBM.045,0.714859,0.727064,0.075356,0.093888,0.887405,0.5,LightGBM,45
LightGBM.063,0.712901,0.738176,0.072006,0.089928,0.886377,0.5,LightGBM,63
LightGBM.088,0.712053,0.715589,0.075613,0.099663,0.888380,0.5,LightGBM,88
LightGBM.029,0.700471,0.774211,0.063110,0.078780,0.885179,0.5,LightGBM,29


## VALIDATION Results!

In [33]:
## Only VALIDATION results
val_results_table = results_df.loc[pd.IndexSlice[:, "validation"], :].droplevel(1, axis=0)
val_results_table[perf_metric] = 1. - val_results_table["fnr"]
val_results_table = val_results_table.drop(columns="fnr")

eval_columns = [
    perf_metric,
    fair_metric,
    "fpr_diff",
    "fnr_diff",
    "roc_auc",
    "threshold",
]
val_results_table = val_results_table[eval_columns]
val_results_table["model"] = [model_idx[:-4] for model_idx in val_results_table.index]
val_results_table["id"] = [int(model_idx[-3:]) for model_idx in val_results_table.index]

val_results_table.sort_values(by=perf_metric, axis=0, ascending=False)

Unnamed: 0,accuracy,fnr_ratio,fpr_diff,fnr_diff,roc_auc,threshold,model,id
LightGBM.016,0.733110,0.709953,0.076147,0.094470,0.896293,0.5,LightGBM,16
LightGBM.042,0.732646,0.706241,0.076000,0.096117,0.896545,0.5,LightGBM,42
LightGBM.024,0.731627,0.712110,0.074545,0.094134,0.897247,0.5,LightGBM,24
LightGBM.030,0.730829,0.711422,0.075021,0.094689,0.897252,0.5,LightGBM,30
LightGBM.049,0.730780,0.726034,0.072227,0.088925,0.892863,0.5,LightGBM,49
...,...,...,...,...,...,...,...,...
LightGBM.045,0.713061,0.723766,0.073721,0.095725,0.888174,0.5,LightGBM,45
LightGBM.063,0.712263,0.741570,0.071359,0.088618,0.887185,0.5,LightGBM,63
LightGBM.088,0.711114,0.707344,0.074540,0.103381,0.889309,0.5,LightGBM,88
LightGBM.029,0.698373,0.775578,0.061098,0.078686,0.885756,0.5,LightGBM,29
