In [35]:
import camelbird as cb
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

np.set_printoptions(precision=3)

# Measuring Fairness

First we retrieve the COMPAS data set from OpenML and train a simple logistic regression classifier.

In [43]:
# retrieve COMPAS data from openml
compas = fetch_openml(data_id='42192')
X = compas['data']
y = compas['target'].astype(np.int)
feature_names = compas['feature_names']

# split train test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# define sensitive feature African American
sensitive_index = feature_names.index('race_African-American')
a_train, a_test = X_train[:,sensitive_index], X_test[:,sensitive_index]

# train simple classifier
lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [44]:
# compute accuracy
print("Accuracy: \t\t\t %.2f" % lr.score(X_test, y_test))

# compute fairness
y_pred = lr.predict(X_test)
print("Equal Opportunity Scores: \t %s" % cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate=None))
print("Equal Opportunity Difference: \t %.2f" % cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='diff'))
print("Equal Opportunity Ratio: \t %.2f" % cb.metrics.equal_opportunity(y_true = y_test, y_pred = y_pred, a = a_test, aggregate='ratio'))

Accuracy: 			 0.69
Equal Opportunity Scores: 	 [0.402 0.661]
Equal Opportunity Difference: 	 -0.26
Equal Opportunity Ratio: 	 1.64
