In [None]:
import numpy as np
import pandas as pd

import shap
shap.initjs()

from fairlearn.reductions import ExponentiatedGradient
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

In [None]:
X, Y = shap.datasets.adult()
sensitive_attribute = 'Sex'

A = X[sensitive_attribute]
X = pd.get_dummies(X)

sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

le = LabelEncoder()
Y = le.fit_transform(Y)

X = X.reset_index(drop=True)
A = A.reset_index(drop=True)

In [None]:
class LeastSquaresBinaryClassifierLearner:
    def __init__(self):
        self.weights = None

    def fit(self, X, Y, sample_weight):
        sqrtW = np.sqrt(sample_weight)
        matX = np.array(X) * sqrtW[:, np.newaxis]
        vecY = Y * sqrtW
        self.lsqinfo = np.linalg.lstsq(matX, vecY, rcond=-1)
        self.weights = pd.Series(self.lsqinfo[0], index=list(X))

    def predict(self, X):
        pred = X.dot(np.asarray(self.weights))
        return 1 * (pred > 0.5)

In [None]:
def get_error(y, predicted_y):
    correct_y = (y==predicted_y)
    return 1 - sum(correct_y)/len(correct_y)

In [None]:
unmitigated_predictor = LeastSquaresBinaryClassifierLearner()
unmitigated_predictor.fit(X, Y, sample_weight=[1])

unmitigated_y = pd.Series(unmitigated_predictor.predict(X),
                          name='unmitigated_predicted_y')

In [None]:
error_unmitigated = [get_error(Y, unmitigated_y)]

In [None]:
# EqualizedOdds violation is calculated as:
# max(abs(E[h(x)| A = a, Y = y] - E[h(x) | Y = y]))
def get_eo_violation(predict_y, A, Y, label_name):
    violations = []
    predicted_and_y_and_sensitive_feat = pd.concat([predict_y, Y, A],axis=1)
    grouped_y_and_sensitive_feat = predicted_and_y_and_sensitive_feat.groupby(
        [Y, sensitive_attribute])
    pass_by_group = grouped_y_and_sensitive_feat[[label_name]].sum()
    counts_by_group = grouped_y_and_sensitive_feat[[label_name]].count()

    grouped_y = predicted_and_y_and_sensitive_feat.groupby(Y)
    pass_by_y = grouped_y[[label_name]].sum()
    counts_by_y = grouped_y[[label_name]].count()

    pos_prob = pass_by_y[label_name][1] / counts_by_y[label_name][1]
    neg_prob = pass_by_y[label_name][0] / counts_by_y[label_name][0]

    for key, item in enumerate(grouped_y_and_sensitive_feat.groups.keys()):
        # E[h(x) | Y = y]
        if item[0] == 1:
            violation_2 = pos_prob
        if item[0] == 0:
            violation_2 = neg_prob

        # E[h(x)| A = a, Y = y]
        violation_1 = pass_by_group[label_name][item[0]][item[1]] / \
                     counts_by_group[label_name][item[0]][item[1]]

        violations.append(abs(violation_1 - violation_2))

    violation = max(violations)
    return violation

In [None]:
true_Y = pd.Series(Y, name='true_y')
eo_violation_unmitigated = [get_eo_violation(unmitigated_y, A, true_Y,
                                             'unmitigated_predicted_y')]

In [None]:
# Experiment 1 - Calculating violation by varying epsilon 
from fairlearn.reductions import EqualizedOdds

eps_list = [0.001, 0.005, 0.01, 0.05, 0.1]
expgrad_error = []
eo_expgrad_violation = []

estimator = LeastSquaresBinaryClassifierLearner()

for eps in eps_list:
    expgrad_X = ExponentiatedGradient(estimator,
                                      constraints=EqualizedOdds(),
                                      eps=eps, nu=1e-6)
    
    expgrad_X.fit(X, Y, sensitive_features=A)
    expgrad_y = pd.Series(expgrad_X.predict(X), name='expgrad_predicted_y')
    
    error_expgrad = get_error(Y, expgrad_y)
    expgrad_error.append(error_expgrad)
    
    eo_violation_expgrad = get_eo_violation(expgrad_y, A, true_Y,
                                            'expgrad_predicted_y')
    eo_expgrad_violation.append(eo_violation_expgrad)


In [None]:
print('{}\t\t{}\t\t\t{}'.format('Epsilon', 'Max Violation', 'Error'))
for i in range(len(eps_list)):
    print('{}\t\t{}\t\t{}'.format(eps_list[i], eo_expgrad_violation[i],
                                  expgrad_error[i]))
print('{}\t\t{}\t\t{}'.format('Unmit.', eo_violation_unmitigated[0],
                              error_unmitigated[0]))

In [None]:
import matplotlib.pyplot as plt

plt.scatter(eo_expgrad_violation, expgrad_error, label="expgrad")
plt.plot(eo_violation_unmitigated, error_unmitigated, 'ro', label="unmitigated")
plt.xlabel('Violation of the fairness constraint')
plt.ylabel('Error')
plt.title('Adult UCI / Equalized Odds / Simple Learner')
plt.legend()
plt.show()

In [None]:
# EqualizedOdds violation is calculated as:
# max(ratio * E[h(x)| A = a, Y = y] - E[h(x) | Y = y]),
#     - E[h(x)| A = a, Y = y] + ratio * E[h(x) | Y = y])
def get_eo_ratio_violation(predict_y, A, Y, ratio, label_name):
    violations = []
    predicted_and_y_and_sensitive_feat = pd.concat([predict_y, Y, A], axis=1)
    grouped_y_and_sensitive_feat = predicted_and_y_and_sensitive_feat.groupby(
        [Y, sensitive_attribute])
    pass_by_group = grouped_y_and_sensitive_feat[[label_name]].sum()
    counts_by_group = grouped_y_and_sensitive_feat[[label_name]].count()

    grouped_y = predicted_and_y_and_sensitive_feat.groupby(Y)
    pass_by_y = grouped_y[[label_name]].sum()
    counts_by_y = grouped_y[[label_name]].count()

    pos_prob = pass_by_y[label_name][1] / counts_by_y[label_name][1]
    neg_prob = pass_by_y[label_name][0] / counts_by_y[label_name][0]

    for key, item in enumerate(grouped_y_and_sensitive_feat.groups.keys()):
        # E[h(x) | Y = y]
        if item[0] == 1:
            violation_2 = pos_prob
        if item[0] == 0:
            violation_2 = neg_prob

        # E[h(x)| A = a, Y = y]
        violation_1 = pass_by_group[label_name][item[0]][item[1]] / \
                     counts_by_group[label_name][item[0]][item[1]]

        violations.append((ratio * violation_1) - violation_2)
        violations.append(- violation_1 + (ratio * violation_2))

    violation = max(violations)
    return violation

In [None]:
# Experiment 2 - Calculating violation by varying epsilon 
from fairlearn.reductions import EqualizedOdds

ratio_list = [0.8, 0.85, 0.9, 0.95, 1.0]
eps_list = [0.001, 0.005, 0.01, 0.05, 0.1]

ratio_to_violation = {}
ratio_to_error = {}
ratio_to_unmitigated_eo_violation = {}

for ratio in ratio_list:
    expgrad_error = []
    eo_expgrad_violation = []
    estimator = LeastSquaresBinaryClassifierLearner()

    for eps in eps_list:
        expgrad_X = ExponentiatedGradient(estimator,
                                          constraints=EqualizedOdds(),
                                          eps=eps, nu=1e-6)

        expgrad_X.fit(X, Y, sensitive_features=A)
        expgrad_y = pd.Series(expgrad_X.predict(X), name='expgrad_predicted_y')

        error_expgrad = get_error(Y, expgrad_y)
        expgrad_error.append(error_expgrad)

        eo_violation_expgrad = get_eo_ratio_violation(expgrad_y, A, true_Y, ratio,
                                                      'expgrad_predicted_y')
        eo_expgrad_violation.append(eo_violation_expgrad)
    
    ratio_to_violation[ratio] = eo_expgrad_violation
    ratio_to_error[ratio] = expgrad_error
    
    eo_violation_unmitigated = get_eo_ratio_violation(unmitigated_y, A, true_Y,
                                                      ratio,
                                                      'unmitigated_predicted_y')
    ratio_to_unmitigated_eo_violation[ratio] = eo_violation_unmitigated

In [None]:
for ratio in ratio_list:
    print('Ratio: {}'.format(ratio))
    eo_expgrad_violation = ratio_to_violation[ratio]
    expgrad_error = ratio_to_error[ratio]
    print('{}\t\t{}\t\t\t{}'.format('Epsilon', 'Max Violation', 'Error'))
    for i in range(len(eps_list)):
        print('{}\t\t{}\t\t{}'.format(eps_list[i], eo_expgrad_violation[i],
                                      expgrad_error[i]))
    print('{}\t\t{}\t\t{}'.format('Unmit.', ratio_to_unmitigated_eo_violation[ratio],
                                  error_unmitigated[0]))

In [None]:
import matplotlib.pyplot as plt

for ratio in ratio_list:
    eo_expgrad_violation = ratio_to_violation[ratio]
    expgrad_error = ratio_to_error[ratio]
    
    plt.scatter(eo_expgrad_violation, expgrad_error, label="expgrad")
    plt.plot([ratio_to_unmitigated_eo_violation[ratio]], error_unmitigated, 'ro', label="unmitigated")
    plt.xlabel('Violation of the fairness constraint')
    plt.ylabel('Error')
    plt.title('Adult UCI / Equalized Odds Ratio = {} / Simple Learner'.format(ratio))
    plt.legend()
    plt.show()