# Simple FairGBM example on the UCI Adult dataset

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

## Load data

In [2]:
from utils import load_uci_adult, split_X_Y_S_uci_adult

train_set, test_set = load_uci_adult()
X_train, Y_train, S_train = split_X_Y_S_uci_adult(train_set)
X_test, Y_test, S_test = split_X_Y_S_uci_adult(test_set);

100% [................................................................................] 5229 / 5229

##  Train model

In [3]:
from fairgbm import FairGBMClassifier

# Instantiate
fairgbm_clf = FairGBMClassifier(
    constraint_type="FNR",    # constraint on equal group-wise TPR (equal opportunity)
    n_estimators=200,         # core parameters from vanilla LightGBM
    multiplier_learning_rate=0.2,
    constraint_fnr_slack=0.05,
    random_state=42,
    n_jobs=-2,
)

In [4]:
%%time
# Train using features (X), labels (Y), and sensitive attributes (S)
fairgbm_clf.fit(X_train, Y_train, constraint_group=S_train)

CPU times: user 6.44 s, sys: 45.8 ms, total: 6.49 s
Wall time: 854 ms


In [5]:
Y_test_pred = fairgbm_clf.predict(X_test)

In [6]:
from sklearn.metrics import confusion_matrix

def evaluate_predictions(y_true, y_pred):
    
    metrics = {}
    
    # Compute confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    # Accuracy score
    metrics["accuracy"] = (tp + tn) / (tp + tn + fp + fn)
    
    # Recall score (TPR; or 1-FNR)
    metrics["recall"] = tp / (tp + fn)
    
    return metrics

In [7]:
male_group_filter = (S_test.to_numpy() == 1.0)
female_group_filter = ~male_group_filter

In [8]:
global_metrics = evaluate_predictions(Y_test, Y_test_pred)
male_group_metrics = evaluate_predictions(Y_test[male_group_filter], Y_test_pred[male_group_filter])
female_group_metrics = evaluate_predictions(Y_test[female_group_filter], Y_test_pred[female_group_filter])

In [9]:
print(global_metrics)
print(male_group_metrics)
print(female_group_metrics)

{'accuracy': 0.8712609790553406, 'recall': 0.6544461778471139}
{'accuracy': 0.8380294659300184, 'recall': 0.6578624078624079}
{'accuracy': 0.9378343479062904, 'recall': 0.635593220338983}


## Compute equal opportunity metric

In [10]:
groupwise_recalls = [male_group_metrics["recall"], female_group_metrics["recall"]]
print(f"Equal opportunity: {min(groupwise_recalls) / max(groupwise_recalls):.1%}")

Equal opportunity: 96.6%
