# Comparison of two methodologies to achieve Equalized Odds

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

from fairlearn.metrics import equalized_odds_difference, MetricFrame
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [None]:
class ModelMitigationAnalyser:
    
    def __init__(self, classifier, X, y_true, sensitive_features):
        self.X = X
        self.y_true = y_true
        self.sensitive_features = sensitive_features
        self.y_pred = classifier.predict(X)
        
        mitigator_eo_to = ThresholdOptimizer(estimator=classifier, constraints='equalized_odds')
        mitigator_eo_to.fit(X, y_true, sensitive_features=sensitive_features)
        self.y_pred_eo_to = mitigator_eo_to.predict(X, sensitive_features=sensitive_features)
        
        mitigator_eo_eg = ExponentiatedGradient(classifier, EqualizedOdds())
        mitigator_eo_eg.fit(X, y_true, sensitive_features=sensitive_features)
        self.y_pred_eo_eg = mitigator_eo_eg.predict(X)
        
    def get_vanilla_accuracy(self):
        vanilla_accuracy = MetricFrame(accuracy_score, self.y_true, self.y_pred, sensitive_features=self.sensitive_features)
        return vanilla_accuracy
    
    def get_vanilla_equalized_odds_difference(self):
        vanilla_eo_difference = equalized_odds_difference(self.y_true, self.y_pred, sensitive_features=self.sensitive_features)
        return vanilla_eo_difference
    
    def get_threshold_optimisation_accuracy(self):
        to_accuracy = MetricFrame(accuracy_score, self.y_true, self.y_pred_eo_to, sensitive_features=self.sensitive_features)
        return to_accuracy
    
    def get_threshold_optimisation_equalized_odds_difference(self):
        to_eo_difference = equalized_odds_difference(self.y_true, self.y_pred_eo_to, sensitive_features=self.sensitive_features)
        return to_eo_difference
    
    def get_exponentiated_gradient_accuracy(self):
        eg_accuracy = MetricFrame(accuracy_score, self.y_true, self.y_pred_eo_eg, sensitive_features=self.sensitive_features)
        return eg_accuracy
    
    def get_exponentiated_gradient_equalized_odds_difference(self):
        eg_eo_difference = equalized_odds_difference(self.y_true, self.y_pred_eo_eg, sensitive_features=self.sensitive_features)
        return eg_eo_difference

### Adult dataset

In [None]:
# Download adult data from https://www.openml.org/d/42477
data_adult = fetch_openml(data_id=1590, as_frame=True)
data_adult.data.head()

In [None]:
X_adult = pd.get_dummies(data_adult.data)
y_true_adult = (data_adult.target == '>50K') * 1
sensitive_features_adult = data_adult.data['sex']
sensitive_features_adult.value_counts()

In [None]:
classifier_adult = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
classifier_adult.fit(X_adult, y_true_adult)
y_pred_adult = classifier_adult.predict(X_adult)

In [None]:
# This might take a few minutes
adult_mitigator_analyser = ModelMitigationAnalyser(
    classifier_adult, 
    X_adult, 
    y_true_adult, 
    sensitive_features_adult
)

### Credit default dataset

In [None]:
# Download credit default data from https://www.openml.org/d/42477
data_credit_default = fetch_openml(data_id=42477, as_frame=True)
data_credit_default.data.head()

In [None]:
X_credit_default = pd.get_dummies(data_credit_default.data)
y_true_credit_default = (data_credit_default.target == "1") * 1
sensitive_features_credit_default = data_credit_default.data['x2']
sensitive_features_credit_default.value_counts()
y_true_credit_default

In [None]:
classifier_credit_default = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
classifier_credit_default.fit(X_credit_default, y_true_credit_default)
y_pred_credit_default = classifier_credit_default.predict(X_credit_default)

In [None]:
# This might take a few minutes
credit_default_mitigator_analyser = ModelMitigationAnalyser(
    classifier_credit_default, 
    X_credit_default, 
    y_true_credit_default, 
    sensitive_features_credit_default
)

### Bank marketing dataset

In [None]:
#Download bank marketing data from https://www.openml.org/d/1461
data_bank_marketing = fetch_openml(data_id=1461, as_frame=True)
data_bank_marketing.data.head()

In [None]:
X_bank_marketing = pd.get_dummies(data_bank_marketing.data)
y_true_bank_marketing = (data_bank_marketing.target == "2") * 1
sensitive_features_bank_marketing = data_bank_marketing.data['V3']
sensitive_features_bank_marketing.value_counts()

In [None]:
classifier_bank_marketing = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
classifier_bank_marketing.fit(X_bank_marketing, y_true_bank_marketing)
y_pred_bank_marketing = classifier_bank_marketing.predict(X_bank_marketing)

In [None]:
# This might take a few minutes
bank_marketing_mitigator_analyser = ModelMitigationAnalyser(
    classifier_bank_marketing, 
    X_bank_marketing, 
    y_true_bank_marketing, 
    sensitive_features_bank_marketing
)

### Datasets analysis

In [None]:
def add_value_to_bar(xs, w, ys1, ys2, ys3):
    fontsize = 8
    
    xs1 = xs - (w / 2)
    xs2 = xs1 + w
    xs3 = xs2 + w
    d_y = 0.001
    
    for x, y in zip(xs1, ys1):
        plt.text(x, y + d_y, "%.3f"%y, fontsize=fontsize, rotation=45)
        
    for x, y in zip(xs2, ys2):
        plt.text(x, y + d_y, "%.3f"%y, fontsize=fontsize, rotation=45)
        
    for x, y in zip(xs3, ys3):
        plt.text(x, y + d_y, "%.3f"%y, fontsize=fontsize, rotation=45)

In [None]:
x = [
    'vanilla', 
    'eo_to', 
    'eo_eg',
]
adult_accuracy = [
    adult_mitigator_analyser.get_vanilla_accuracy().overall,
    adult_mitigator_analyser.get_threshold_optimisation_accuracy().overall,
    adult_mitigator_analyser.get_exponentiated_gradient_accuracy().overall,
]
credit_default_accuracy = [
    credit_default_mitigator_analyser.get_vanilla_accuracy().overall,
    credit_default_mitigator_analyser.get_threshold_optimisation_accuracy().overall,
    credit_default_mitigator_analyser.get_exponentiated_gradient_accuracy().overall,
]
bank_marketing_accuracy = [
    bank_marketing_mitigator_analyser.get_vanilla_accuracy().overall,
    bank_marketing_mitigator_analyser.get_threshold_optimisation_accuracy().overall,
    bank_marketing_mitigator_analyser.get_exponentiated_gradient_accuracy().overall,
]

width = 0.3
locs = np.arange(1, len(x)+1)

plt.title("Accuracy")
plt.bar(locs, adult_accuracy, width=width, color="aquamarine", hatch="-"*3, label='Adult')
plt.bar(locs+width, credit_default_accuracy, width=width, color="darkorange", hatch="|"*3, label='Credit Default')
plt.bar(locs+2*width, bank_marketing_accuracy, width=width, color="lightsteelblue", hatch="."*3, label='Bank Marketing')
plt.legend()

add_value_to_bar(locs, width, adult_accuracy, credit_default_accuracy, bank_marketing_accuracy)

plt.xticks(locs+width, ["vanilla", "thresh. optimisation", "exp. gradient"])
plt.ylim([0.81, 0.925])
plt.xlim([0.5, 6])
plt.show()

In [None]:
x = [
    'vanilla', 
    'threshold optimisation', 
    'eo_eg',
]
adult_eo_difference = [
    adult_mitigator_analyser.get_vanilla_equalized_odds_difference(),
    adult_mitigator_analyser.get_threshold_optimisation_equalized_odds_difference(),
    adult_mitigator_analyser.get_exponentiated_gradient_equalized_odds_difference(),
]
credit_default_eo_difference = [
    credit_default_mitigator_analyser.get_vanilla_equalized_odds_difference(),
    credit_default_mitigator_analyser.get_threshold_optimisation_equalized_odds_difference(),
    credit_default_mitigator_analyser.get_exponentiated_gradient_equalized_odds_difference(),
]
bank_marketing_eo_difference = [
    bank_marketing_mitigator_analyser.get_vanilla_equalized_odds_difference(),
    bank_marketing_mitigator_analyser.get_threshold_optimisation_equalized_odds_difference(),
    bank_marketing_mitigator_analyser.get_exponentiated_gradient_equalized_odds_difference(),
]

width = 0.3
locs = np.arange(1, len(x)+1)

plt.title("Equalized Odds Difference")
plt.bar(locs, adult_eo_difference, width=width, color="aquamarine", hatch="-"*3, label='Adult')
plt.bar(locs+width, credit_default_eo_difference, width=width, color="darkorange", hatch="|"*3, label='Credit Default')
plt.bar(locs+2*width, bank_marketing_eo_difference, width=width, color="lightsteelblue", hatch="."*3, label='Bank Marketing')
plt.legend()

add_value_to_bar(locs, width, adult_eo_difference, credit_default_eo_difference, bank_marketing_eo_difference)

plt.xticks(locs+width, ["vanilla", "thresh. optimisation", "exp. gradient"])
plt.ylim([0, 0.095])
plt.show()