**What this notebook does?**

Experiment to **Demographic Parity Ratio** constraint on adult dataset.

Things to note:
1. Input - Adult dataset from shap. Sensitive feature = 'Sex'
2. Classifier - LogististicRegression from sklearn library.
3. Constraint evaluted - Demographic Parity with ratio = 0.8


In [None]:
import numpy as np
import pandas as pd

import shap
shap.initjs()

from fairlearn.reductions import GridSearch
from fairlearn.reductions import DemographicParity
from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression

In [None]:
X_raw, Y = shap.datasets.adult()
sensitive_attribute = 'Sex'

A = X_raw[sensitive_attribute]
X = X_raw.drop(labels=[sensitive_attribute],axis = 1)
X = pd.get_dummies(X)

sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

le = LabelEncoder()
Y = le.fit_transform(Y)

X = X.reset_index(drop=True)
A = A.reset_index(drop=True)

In [None]:
unmitigated_predictor = LogisticRegression(fit_intercept=True, solver='liblinear')
unmitigated_predictor.fit(X, Y)

In [None]:
def get_error(y, predicted_y):
    correct_y = (y == predicted_y)
    return 1 - sum(correct_y)/len(correct_y)

In [None]:
unmitigated_y = pd.Series(unmitigated_predictor.predict(X), name="unmitigated_predicted_y")
error_unmitigated = [get_error(Y, unmitigated_y)]
print("The error for unmitigated is:", error_unmitigated)

In [None]:
def get_dp_violation(predict_y, A, ratio, label_name):
    violations = []
    predicted_and_sensitive_feature = pd.concat([predict_y, A],axis=1)
    grouped = predicted_and_sensitive_feature.groupby(sensitive_attribute)
    counts_by_group = grouped[[label_name]].count()
    passed_by_group = grouped[[label_name]].sum()
    
    for i,group in enumerate(grouped.groups.keys()):
        violation_1 = passed_by_group[label_name][i] / counts_by_group[label_name][i]
        violation_2 = sum(predict_y) / len(predict_y)
        # ratio <= E[h(x)| A = a]/E[h(x)] <= 1/ratio 
        # 1. - E[h(x)| A = a] + ratio * E[h(x)] <= 0
        # 2. ratio * E[h(x)| A = a] - E[h(x)] <= 0
        violations.append(abs(violation_1 - (ratio * violation_2)))
        violations.append(abs((ratio * violation_1) - violation_2))
    violation = max(violations)
    return violation

In [None]:
from fairlearn.reductions import ExponentiatedGradient
from fairlearn.reductions import GridSearch, DemographicParity
import numpy as np
eps_list = [0.001]
expgrad_error = []
dp_expgrad_violation = []
ratio = 0.8
for eps in eps_list:
    expgrad_X = ExponentiatedGradient(
    LogisticRegression(fit_intercept=True, solver='liblinear'),
    constraints=DemographicParity(ratio=ratio),
    eps=eps, nu=1e-6)
    
    expgrad_X.fit(X, Y, sensitive_features=A)
    
    expgrad_y = pd.Series(expgrad_X.predict(X),name="expgrad_predicted_y")
    error_expgrad = get_error(Y, expgrad_y)
    expgrad_error.append(error_expgrad)
    dp_violation_expgrad = get_dp_violation(expgrad_y, A, ratio,"expgrad_predicted_y")
    dp_expgrad_violation.append(dp_violation_expgrad)
    
dp_violation_unmitigated = [get_dp_violation(unmitigated_y, A, ratio, 'unmitigated_predicted_y')]
print("The violation for unmitigated is {} and mitigated is {}:".format(
    dp_violation_unmitigated, dp_expgrad_violation))

In [None]:
import matplotlib.pyplot as plt

plt.scatter(dp_expgrad_violation, expgrad_error, label="expgrad")
plt.plot(dp_violation_unmitigated, error_unmitigated, 'ro', label="unmitigated")
plt.xlabel('Violation of the fairness constraint')
plt.ylabel('Error')
plt.title('Adult Uci/DP/log.reg')
plt.legend()
plt.show()