In [1]:
!pip install fairlearn



In [2]:
import pandas as pd
import numpy as np

## Load data and preprocess

In [3]:
data = pd.read_csv('fairness/data/preprocessed/adult_numerical.csv', sep=',')

In [4]:
# remove individuals in 'Other' race category
data = data[data['race'] != 'Other']

In [5]:
data['A_race'] = data['race'].astype("category").cat.codes
data['A_sex'] = data['sex'].astype("category").cat.codes

In [6]:
# one hot encode race and sex
data = pd.get_dummies(data, columns = ['race', 'sex'])

In [7]:
# remove redundant columns
data = data.drop(columns=['education-num', 'sex_Female', 'race_Amer-Indian-Eskimo',
                   'workclass_Without-pay', 'education_1st-4th', 'marital-status_Never-married',
                  'occupation_Other-service', 'relationship_Other-relative', 'native-country_Yugoslavia'])

In [8]:
# make income-per-year binary
data['Y'] = (data['income-per-year'] != '<=50K')

In [9]:
data = data.drop(columns=['income-per-year'])

In [10]:
# get convert each combination of race and sex to a numerical category
data['A_race-sex'] = data['race-sex'].astype("category").cat.codes

In [11]:
data = data.drop(columns=['race-sex'])

In [12]:
data.head()

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,...,native-country_United-States,native-country_Vietnam,A_race,A_sex,race_Asian-Pac-Islander,race_Black,race_White,sex_Male,Y,A_race-sex
0,39,2174,0,40,0,0,0,0,0,1,...,1,0,3,1,0,0,1,1,False,7
1,50,0,0,13,0,0,0,0,1,0,...,1,0,3,1,0,0,1,1,False,7
2,38,0,0,40,0,0,1,0,0,0,...,1,0,3,1,0,0,1,1,False,7
3,53,0,0,40,0,0,1,0,0,0,...,1,0,2,1,0,1,0,1,False,5
4,28,0,0,40,0,0,1,0,0,0,...,0,0,2,0,0,1,0,0,False,4


In [13]:
# shuffle rows for randomization
data = data.sample(frac=1)

In [14]:
# separate data into 50% train and 50% test set
sep = int(0.50 * len(data) + 0.5)
train_data = data[:sep]
test_data = data[sep:]

In [15]:
print(len(train_data))
print(len(test_data))

14966
14965


In [16]:
X_train = train_data.drop(columns=['Y', 'A_race', 'A_sex', 'A_race-sex'])
X_test = test_data.drop(columns=['Y', 'A_race', 'A_sex', 'A_race-sex'])
Y_train = train_data['Y']
Y_test = test_data['Y']
A_race_train = train_data['A_race']
A_race_test = test_data['A_race']
A_sex_train = train_data['A_sex']
A_sex_test = test_data['A_sex']
A_race_sex_train = train_data['A_race-sex']
A_race_sex_test = test_data['A_race-sex']

## Fit Logistic Regression Model with Fairness Constraints

In [61]:
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.reductions import GridSearch, EqualizedOdds
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import (
    MetricFrame,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,
    equalized_odds_difference)

from sklearn.metrics import balanced_accuracy_score, roc_auc_score

In [62]:
# Helper functions
def get_metrics_df(models_dict, y_true, group):
    metrics_dict = {
        "Overall selection rate": (
            lambda x: selection_rate(y_true, x), True),
        "Demographic parity difference": (
            lambda x: demographic_parity_difference(y_true, x, sensitive_features=group), True),
        "Demographic parity ratio": (
            lambda x: demographic_parity_ratio(y_true, x, sensitive_features=group), True),
        "------": (lambda x: "", True),
        "False positive rate difference": (
            lambda x: false_positive_rate_difference(y_true, x, sensitive_features=group), True),
        "False negative rate difference": (
            lambda x: false_negative_rate_difference(y_true, x, sensitive_features=group), True),
        "Equalized odds difference": (
            lambda x: equalized_odds_difference(y_true, x, sensitive_features=group), True),
        "  ------": (lambda x: "", True),
        "Overall AUC": (
            lambda x: roc_auc_score(y_true, x), False),
        "AUC difference": (
            lambda x: MetricFrame(roc_auc_score, y_true, x, sensitive_features=group).difference(method='between_groups'), False),
    }
    df_dict = {}
    for metric_name, (metric_func, use_preds) in metrics_dict.items():
        df_dict[metric_name] = [metric_func(preds) if use_preds else metric_func(scores) 
                                for model_name, (preds, scores) in models_dict.items()]
    return pd.DataFrame.from_dict(df_dict, orient="index", columns=models_dict.keys())

In [74]:
# Fit logistic regression model
model = LogisticRegression(max_iter=500, multi_class='ovr')
model.fit(X_train, Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(max_iter=500, multi_class='ovr')

In [79]:
print('Unconstrained model accuracies:')
train_acc, test_acc = model.score(X_train, Y_train), model.score(X_test, Y_test)
unconstrained_test_preds = model.predict(X_test)
unconstrained_test_scores = model.predict_proba(X_test)[:, 1]
print('Train acc:', train_acc)
print('Test acc:', test_acc)
print('Overall generalization gap:', train_acc - test_acc)

Unconstrained model accuracies:
Train acc: 0.8427101429907791
Test acc: 0.8471099231540261
Overall generalization gap: -0.004399780163247047


### Equalized Odds

#### Using postprocessing algorithm from Hardt et. al "Equality of Opportunity in Supervised Learning"


In [81]:
postprocess_est = ThresholdOptimizer(
    estimator=model,
    constraints="equalized_odds",
    prefit=True)

In [82]:
postprocess_est.fit(X_train, Y_train, sensitive_features=A_sex_train)

ThresholdOptimizer(constraints='equalized_odds',
                   estimator=LogisticRegression(max_iter=500,
                                                multi_class='ovr'),
                   prefit=True)

In [83]:
postprocess_preds_train = postprocess_est.predict(X_train, sensitive_features=A_sex_train)
postprocess_preds_test = postprocess_est.predict(X_test, sensitive_features=A_sex_test)

In [86]:
print('Hardt et al model accuracies:')

train_acc = sum(postprocess_preds_train != Y_train) / len(postprocess_preds_train)
test_acc = sum(postprocess_preds_test != Y_test) / len(postprocess_preds_test)
print('Train acc:', train_acc)
print('Test acc:', test_acc)
print('Overall generalization gap:', train_acc - test_acc)

Hardt et al model accuracies:
Train acc: 0.19256982493652278
Test acc: 0.18757099899766122
Overall generalization gap: 0.004998825938861556


In [87]:
models_dict = { 'Unconstrained': (unconstrained_test_preds, unconstrained_test_scores),
    'Hardt et al.': (postprocess_preds_test, postprocess_preds_test)}
get_metrics_df(models_dict, Y_test, A_race_sex_test)

Unnamed: 0,Unconstrained,Hardt et al.
Overall selection rate,0.207551,0.196525
Demographic parity difference,0.2987,0.203715
Demographic parity ratio,0.0787458,0.28086
------,,
False positive rate difference,0.159132,0.119589
False negative rate difference,0.477778,0.548611
Equalized odds difference,0.477778,0.548611
------,,
Overall AUC,0.899805,0.713856
AUC difference,0.136121,0.258223
