In [49]:
import camelbird as cb
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

np.set_printoptions(precision=3)

# Measuring Fairness

First we retrieve the COMPAS data set from OpenML and train a simple logistic regression classifier.

In [50]:
# retrieve COMPAS data from openml
compas = fetch_openml(data_id='42192')
X = compas['data']
y = compas['target'].astype(np.int)
feature_names = compas['feature_names']

# split train test set 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# define sensitive feature African American
sensitive_index = feature_names.index('race_African-American')
a_train, a_test = X_train[:,sensitive_index], X_test[:,sensitive_index]

# train simple classifier
lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression()

In [67]:
# compute accuracy
print("Accuracy: \t\t\t {0:.3f}".format(lr.score(X_test, y_test)))
print()
# compute fairness
y_pred = lr.predict(X_test)
print("Equal Opportunity Scores: \t {}".format(cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate=None)))
print("Equal Opportunity Diff: \t {0:.3f}".format(cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='diff')))
print("Equal Opportunity Ratio: \t {0:.3f}".format(cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='ratio')))
print()
print("Equal Odds Scores: \t {0}".format((cb.metrics.equal_odds(y_true = y_test, y_pred = y_pred, a = a_test, aggregate=None))))
print("Equal Odds Diff: \t {0:.3f}".format(cb.metrics.equal_odds(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='diff')))
print("Equal Odds Ratio: \t {0:.3f}".format(cb.metrics.equal_odds(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='ratio')))
print()
print("Demographic Parity Scores: \t {0}".format(cb.metrics.demographic_parity(y_true = y_test, y_pred= y_pred, a = a_test, aggregate=None)))
print("Demographic Parity Diff: \t {0:.3f}".format(cb.metrics.demographic_parity(y_true = y_test, y_pred= y_pred, a = a_test, aggregate='diff')))
print("Demographic Parity Ratio: \t {0:.3f}".format(cb.metrics.demographic_parity(y_true = y_test, y_pred= y_pred, a = a_test, aggregate='ratio')))

Accuracy: 			 0.689

Equal Opportunity Scores: 	 [0.402 0.661]
Equal Opportunity Diff: 	 -0.259
Equal Opportunity Ratio: 	 1.644

Equal Odds Scores: 	 [[0.402 0.661]
 [0.895 0.708]]
Equal Odds Diff: 	 -0.036
Equal Odds Ratio: 	 1.217

Demographic Parity Scores: 	 [0.224 0.479]
Demographic Parity Diff: 	 -0.255
Demographic Parity Ratio: 	 2.143
