In [None]:
import numpy as np
import pandas as pd

import shap
shap.initjs()

from fairlearn.reductions import ExponentiatedGradient
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

In [None]:
X, Y = shap.datasets.adult()
sensitive_attribute = 'Sex'

A = X[sensitive_attribute]
X = pd.get_dummies(X)

sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

le = LabelEncoder()
Y = le.fit_transform(Y)

X = X.reset_index(drop=True)
A = A.reset_index(drop=True)

In [None]:
class LeastSquaresBinaryClassifierLearner:
    def __init__(self):
        self.weights = None

    def fit(self, X, Y, sample_weight):
        sqrtW = np.sqrt(sample_weight)
        matX = np.array(X) * sqrtW[:, np.newaxis]
        vecY = Y * sqrtW
        self.lsqinfo = np.linalg.lstsq(matX, vecY, rcond=-1)
        self.weights = pd.Series(self.lsqinfo[0], index=list(X))

    def predict(self, X):
        pred = X.dot(np.asarray(self.weights))
        return 1 * (pred > 0.5)

In [None]:
def get_error(y, predicted_y):
    correct_y = (y==predicted_y)
    return 1 - sum(correct_y)/len(correct_y)

In [None]:
unmitigated_predictor = LeastSquaresBinaryClassifierLearner()
unmitigated_predictor.fit(X, Y, sample_weight=[1])

unmitigated_y = pd.Series(unmitigated_predictor.predict(X),
                          name='unmitigated_predicted_y')

In [None]:
error_unmitigated = [get_error(Y, unmitigated_y)]

In [None]:
# ErrorRate violation is calculated as:
# max(abs( E[abs(h(x) - Y)| A = a] - E[abs(h(x)- y)] ))
def get_er_violation(predict_y, A, Y):
    violations = []
    label_name = 'error'
    error = pd.Series(abs(predict_y - Y), name=label_name)
    error_and_sensitive_feat = pd.concat([error, A],axis=1)
    grouped_sensitive_feat = error_and_sensitive_feat.groupby(
        sensitive_attribute)
    pass_by_group = grouped_sensitive_feat[[label_name]].sum()
    counts_by_group = grouped_sensitive_feat[[label_name]].count()

    for i, item in enumerate(grouped_sensitive_feat.groups.keys()):
        # E[abs(h(x) - y)]
        violation_2 = sum(error) / len(error)

        # E[abs(h(x) - y)| A = a]
        violation_1 = pass_by_group[label_name][i] / \
                      counts_by_group[label_name][i]

        violations.append(abs(violation_1 - violation_2))

    violation = max(violations)
    return violation

In [None]:
true_Y = pd.Series(Y, name='true_y')
er_violation_unmitigated = [get_er_violation(unmitigated_y, A, true_Y)]

In [None]:
# Experiment 1 - Calculating violation by varying epsilon 
from fairlearn.reductions import EqualizedOdds

eps_list = [0.001, 0.005, 0.01, 0.05, 0.1]
expgrad_error = []
er_expgrad_violation = []

estimator = LeastSquaresBinaryClassifierLearner()

for eps in eps_list:
    expgrad_X = ExponentiatedGradient(estimator,
                                      constraints=EqualizedOdds(),
                                      eps=eps, nu=1e-6)
    
    expgrad_X.fit(X, Y, sensitive_features=A)
    expgrad_y = pd.Series(expgrad_X.predict(X), name='expgrad_predicted_y')
    
    error_expgrad = get_error(Y, expgrad_y)
    expgrad_error.append(error_expgrad)
    
    er_violation_expgrad = get_er_violation(expgrad_y, A, true_Y)
    er_expgrad_violation.append(er_violation_expgrad)


In [None]:
print('{}\t\t{}\t\t\t{}'.format('Epsilon', 'Max Violation', 'Error'))
for i in range(len(eps_list)):
    print('{}\t\t{}\t\t{}'.format(eps_list[i], er_expgrad_violation[i],
                                  expgrad_error[i]))
print('{}\t\t{}\t\t{}'.format('Unmit.', er_violation_unmitigated[0],
                              error_unmitigated[0]))

In [None]:
import matplotlib.pyplot as plt

plt.scatter(er_expgrad_violation, expgrad_error, label="expgrad")
plt.plot(er_violation_unmitigated, error_unmitigated, 'ro', label="unmitigated")
plt.xlabel('Violation of the fairness constraint')
plt.ylabel('Error')
plt.title('Adult UCI / Error Rate Difference / Simple Learner')
plt.legend()
plt.show()

In [None]:
# ErrorRate violation is calculated as:
# max(r * E[abs(h(x) - Y)| A = a] - E[abs(h(x)- y)],
#     - E[abs(h(x) - Y)| A = a] + r * E[abs(h(x)- y)])
def get_er_ratio_violation(predict_y, A, Y, ratio):
    violations = []
    label_name = 'error'
    error = pd.Series(abs(predict_y - Y), name=label_name)
    error_and_sensitive_feat = pd.concat([error, A],axis=1)
    grouped_sensitive_feat = error_and_sensitive_feat.groupby(
        sensitive_attribute)
    pass_by_group = grouped_sensitive_feat[[label_name]].sum()
    counts_by_group = grouped_sensitive_feat[[label_name]].count()

    for i, item in enumerate(grouped_sensitive_feat.groups.keys()):
        # E[abs(h(x) - y)]
        violation_2 = sum(error) / len(error)

        # E[abs(h(x) - y)| A = a]
        violation_1 = pass_by_group[label_name][i] / \
                      counts_by_group[label_name][i]

        violations.append((ratio * violation_1) - violation_2)
        violations.append(- violation_1 + (ratio * violation_2))

    violation = max(violations)
    return violation

In [None]:
# Experiment 2 - Calculating violation by varying epsilon 
from fairlearn.reductions import EqualizedOdds

ratio_list = [0.8, 0.85, 0.9, 0.95, 1.0]
eps_list = [0.001, 0.005, 0.01, 0.05, 0.1]

ratio_to_violation = {}
ratio_to_error = {}
ratio_to_unmitigated_er_violation = {}

for ratio in ratio_list:
    expgrad_error = []
    er_expgrad_violation = []
    estimator = LeastSquaresBinaryClassifierLearner()

    for eps in eps_list:
        expgrad_X = ExponentiatedGradient(estimator,
                                          constraints=EqualizedOdds(),
                                          eps=eps, nu=1e-6)

        expgrad_X.fit(X, Y, sensitive_features=A)
        expgrad_y = pd.Series(expgrad_X.predict(X), name='expgrad_predicted_y')

        error_expgrad = get_error(Y, expgrad_y)
        expgrad_error.append(error_expgrad)

        er_violation_expgrad = get_er_ratio_violation(expgrad_y, A, true_Y, ratio)
        er_expgrad_violation.append(er_violation_expgrad)
    
    ratio_to_violation[ratio] = er_expgrad_violation
    ratio_to_error[ratio] = expgrad_error
    
    er_violation_unmitigated = get_er_ratio_violation(unmitigated_y, A, true_Y,
                                                      ratio)
    ratio_to_unmitigated_er_violation[ratio] = er_violation_unmitigated

In [None]:
for ratio in ratio_list:
    print('Ratio: {}'.format(ratio))
    er_expgrad_violation = ratio_to_violation[ratio]
    expgrad_error = ratio_to_error[ratio]
    print('{}\t\t{}\t\t\t{}'.format('Epsilon', 'Max Violation', 'Error'))
    for i in range(len(eps_list)):
        print('{}\t\t{}\t\t{}'.format(eps_list[i], er_expgrad_violation[i],
                                      expgrad_error[i]))
    print('{}\t\t{}\t\t{}'.format('Unmit.', ratio_to_unmitigated_er_violation[ratio],
                                  error_unmitigated[0]))

In [None]:
import matplotlib.pyplot as plt

for ratio in ratio_list:
    er_expgrad_violation = ratio_to_violation[ratio]
    expgrad_error = ratio_to_error[ratio]
    
    plt.scatter(er_expgrad_violation, expgrad_error, label="expgrad")
    plt.plot([ratio_to_unmitigated_er_violation[ratio]], error_unmitigated, 'ro', label="unmitigated")
    plt.xlabel('Violation of the fairness constraint')
    plt.ylabel('Error')
    plt.title('Adult UCI / Error Rate Ratio = {} / Simple Learner'.format(ratio))
    plt.legend()
    plt.show()