#### This notebook demonstrates the use of Reweighing pre-processing, Adversarial Debiasing in-processing and Reject Option Classification (ROC) post-processing algorithms for bias mitigation.
- Load imports
- Dataset
    * Load Adult, COMPAS, or German dataset and set privileged and unprivileged groups
    * Divide the dataset into training, validation, and testing partitions
    * Show dataset properties
- Pre-processing: Reweighing.
    * Show difference in mean outcomes for original training data
    * Assign weights with reweighing
    * Show difference in mean outcomes for transformed training data
- In-processing: Adversarial Debiasing.
    * Train model without debiasing, predict, and show metrics
    * Train model with debiasing, predict, and show metrics
- Post-processing: Reject Option Classification (ROC).
    * Show metrics for test set from Adversarial Debiasing without debiasing
    * Fit ROC model
    * Transform labels and show metrics

In [1]:
# Load all necessary packages
import sys
sys.path.append("../")
import numpy as np
import tensorflow as tf
from warnings import warn 

# Avoid deprecation warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
from aif360.algorithms.postprocessing.reject_option_classification\
        import RejectOptionClassification

from common_utils import compute_metrics

from IPython.display import Markdown, display
from ipywidgets import interactive, FloatSlider

#### Load dataset and specify options

In [2]:
## import dataset
dataset_used = "german" # "adult", "german", "compas"
protected_attribute_used = 1 # 1, 2

if dataset_used == "adult":
#     dataset_orig = AdultDataset()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 1}]
        unprivileged_groups = [{'sex': 0}]
        dataset_orig = load_preproc_data_adult(['sex'])
    else:
        privileged_groups = [{'race': 1}]
        unprivileged_groups = [{'race': 0}]
        dataset_orig = load_preproc_data_adult(['race'])
    
elif dataset_used == "german":
#     dataset_orig = GermanDataset()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 1}]
        unprivileged_groups = [{'sex': 0}]
        dataset_orig = load_preproc_data_german(['sex'])
    else:
        privileged_groups = [{'age': 1}]
        unprivileged_groups = [{'age': 0}]
        dataset_orig = load_preproc_data_german(['age'])
    
elif dataset_used == "compas":
#     dataset_orig = CompasDataset()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 0}]
        unprivileged_groups = [{'sex': 1}]
        dataset_orig = load_preproc_data_compas(['sex'])
    else:
        privileged_groups = [{'race': 1}]
        unprivileged_groups = [{'race': 0}]  
        dataset_orig = load_preproc_data_compas(['race'])

#### Split into train, test and validation

In [3]:
# Get the dataset and split into train and test
dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)
dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)

#### Clean up training data and display properties of the data

In [4]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes, 
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

#### Training Dataset shape

(700, 11)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['sex']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['age', 'sex', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed']


## Pre-processing: Reweighing

#### Metric for original training data

In [31]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Weights = %f , %f, %f, ..." % (dataset_orig_train.instance_weights[1], dataset_orig_train.instance_weights[2], \
                                    dataset_orig_train.instance_weights[3]))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Weights = 1.000000 , 1.000000, 1.000000, ...
Difference in mean outcomes between unprivileged and privileged groups = -0.097778


#### Reweighing

In [6]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset_orig_train)

#### Metric for reweighted training data

In [29]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Weights = %8f , %8f, %8f, ..." % (dataset_transf_train.instance_weights[1], dataset_transf_train.instance_weights[2], \
                                    dataset_transf_train.instance_weights[3]))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Weights = 0.956349 , 0.956349, 1.112245, ...
Difference in mean outcomes between unprivileged and privileged groups = -0.000000


## In-processing: Adversarial Debiasing

### Without debiasing

#### Train without debiasing

In [8]:
# Learn parameters with debias set to False
sess = tf.Session() 
plain_model_nodebias = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='plain_classifier',
                          debias=False,
                           sess=sess)

In [9]:
plain_model_nodebias.fit(dataset_orig_train)

epoch 0; iter: 0; batch classifier loss: 0.705588
epoch 1; iter: 0; batch classifier loss: 0.649443
epoch 2; iter: 0; batch classifier loss: 0.621172
epoch 3; iter: 0; batch classifier loss: 0.604294
epoch 4; iter: 0; batch classifier loss: 0.576895
epoch 5; iter: 0; batch classifier loss: 0.528867
epoch 6; iter: 0; batch classifier loss: 0.644497
epoch 7; iter: 0; batch classifier loss: 0.615819
epoch 8; iter: 0; batch classifier loss: 0.631111
epoch 9; iter: 0; batch classifier loss: 0.547435
epoch 10; iter: 0; batch classifier loss: 0.576678
epoch 11; iter: 0; batch classifier loss: 0.582958
epoch 12; iter: 0; batch classifier loss: 0.552135
epoch 13; iter: 0; batch classifier loss: 0.585561
epoch 14; iter: 0; batch classifier loss: 0.539198
epoch 15; iter: 0; batch classifier loss: 0.623623
epoch 16; iter: 0; batch classifier loss: 0.598153
epoch 17; iter: 0; batch classifier loss: 0.597544
epoch 18; iter: 0; batch classifier loss: 0.556966
epoch 19; iter: 0; batch classifier loss:

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x1f4c3e76088>

#### Show metrics

In [10]:
# Apply the plain model to test data
dataset_nodebiasing_train = plain_model_nodebias.predict(dataset_orig_train)
dataset_nodebiasing_valid = plain_model_nodebias.predict(dataset_orig_valid)
dataset_nodebiasing_test = plain_model_nodebias.predict(dataset_orig_test)

In [11]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_nodebiasing_train = BinaryLabelDatasetMetric(dataset_nodebiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())

metric_dataset_nodebiasing_test = BinaryLabelDatasetMetric(dataset_nodebiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_nodebiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_nodebiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())

# Compute scores for ROC
dataset_nodebiasing_valid.scores = plain_model_nodebias.predict_proba(dataset_orig_valid).reshape(-1,1)
dataset_nodebiasing_test.scores = plain_model_nodebias.predict_proba(dataset_orig_test).reshape(-1,1)


#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.400000
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.365854


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.720000
Test set: Balanced classification accuracy = 0.509317
Test set: Disparate impact = 0.634146
Test set: Equal opportunity difference = -0.333333
Test set: Average odds difference = -0.416667
Test set: Theil_index = 0.127632


### With debiasing

#### Train with debiasing

In [12]:
sess.close()
tf.reset_default_graph()
sess = tf.Session()

In [13]:
# Learn parameters with debias set to True
sess = tf.Session()
plain_model_debias = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='plain_classifier',
                          debias=True,
                           sess=sess)

In [14]:
plain_model_debias.fit(dataset_orig_train)

epoch 0; iter: 0; batch classifier loss: 0.672484; batch adversarial loss: 0.656056
epoch 1; iter: 0; batch classifier loss: 0.683665; batch adversarial loss: 0.619799
epoch 2; iter: 0; batch classifier loss: 0.645837; batch adversarial loss: 0.644092
epoch 3; iter: 0; batch classifier loss: 0.599088; batch adversarial loss: 0.596543
epoch 4; iter: 0; batch classifier loss: 0.634671; batch adversarial loss: 0.610302
epoch 5; iter: 0; batch classifier loss: 0.544876; batch adversarial loss: 0.607539
epoch 6; iter: 0; batch classifier loss: 0.651467; batch adversarial loss: 0.575780
epoch 7; iter: 0; batch classifier loss: 0.653255; batch adversarial loss: 0.610739
epoch 8; iter: 0; batch classifier loss: 0.579653; batch adversarial loss: 0.616999
epoch 9; iter: 0; batch classifier loss: 0.560469; batch adversarial loss: 0.584139
epoch 10; iter: 0; batch classifier loss: 0.520689; batch adversarial loss: 0.637061
epoch 11; iter: 0; batch classifier loss: 0.537064; batch adversarial loss:

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x1f4c6259188>

#### Show metrics

In [15]:
# Apply the plain model to test data
dataset_debiasing_train = plain_model_debias.predict(dataset_orig_train)
dataset_debiasing_test = plain_model_debias.predict(dataset_orig_test)

In [16]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())

metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())

display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_debiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
#metric_test_bef = compute_metrics(dataset_orig_test, dataset_nodebiasing_test, 
#                unprivileged_groups, privileged_groups)
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = 0.109474
Test set: Difference in mean outcomes between unprivileged and privileged groups = 0.064220


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.746667
Test set: Balanced classification accuracy = 0.506832
Test set: Disparate impact = 1.068627
Test set: Equal opportunity difference = 0.060976
Test set: Average odds difference = 0.067525
Test set: Theil_index = 0.085861


## Post-processing: Reject Option Classification

#### Show metrics for Test Set

In [17]:
# Metrics for the test set
display(Markdown("#### Test set"))
display(Markdown("##### Raw predictions - No fairness constraints"))
classified_metric_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_nodebiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)

print("Classification accuracy = %f" % classified_metric_test.accuracy())

metric_test_bef = compute_metrics(dataset_orig_test, dataset_nodebiasing_test, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Raw predictions - No fairness constraints

Classification accuracy = 0.720000
Balanced accuracy = 0.5093
Statistical parity difference = -0.3659
Disparate impact = 0.6341
Average odds difference = -0.4167
Equal opportunity difference = -0.3333
Theil index = 0.1276


#### Estimate optimal parameters for the ROC method

In [18]:
# Metric used (should be one of allowed_metrics)
metric_name = "Statistical parity difference"

# Upper and lower bound on the fairness metric used
metric_ub = 0.05
metric_lb = -0.05
        
#random seed for calibrated equal odds prediction
np.random.seed(1)

# Verify metric name
allowed_metrics = ["Statistical parity difference",
                   "Average odds difference",
                   "Equal opportunity difference"]
if metric_name not in allowed_metrics:
    raise ValueError("Metric name should be one of allowed metrics")

In [19]:
ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, 
                                 privileged_groups=privileged_groups, 
                                 low_class_thresh=0.01, high_class_thresh=0.99,
                                  num_class_thresh=100, num_ROC_margin=50,
                                  metric_name=metric_name,
                                  metric_ub=metric_ub, metric_lb=metric_lb)
ROC = ROC.fit(dataset_orig_valid,dataset_nodebiasing_valid)

In [20]:
print("Optimal classification threshold (with fairness constraints) = %.4f" % ROC.classification_threshold)
print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

Optimal classification threshold (with fairness constraints) = 0.5841
Optimal ROC margin = 0.0085


#### Show predictions from Test Set with ROC

In [21]:
# Metrics for the transformed test set
dataset_transf_test = ROC.predict(dataset_nodebiasing_test)

display(Markdown("#### Test set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
classified_metric_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_transf_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)

print("Classification accuracy = %f" % classified_metric_test.accuracy()) 

metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Transformed predictions - With fairness constraints

Classification accuracy = 0.593333
Balanced accuracy = 0.5857
Statistical parity difference = 0.0349
Disparate impact = 1.0634
Average odds difference = 0.0505
Equal opportunity difference = 0.0085
Theil index = 0.4063


References:

F. Kamiran, and T. Claders,"Data preprocessing techniques for classification without discrimination",
Knowledge and Information Systems, 33(1):1–33, 2012. 

B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating UnwantedBiases with Adversarial Learning",
AAAI/ACM Conference on Artificial Intelligence, Ethics, and Society, 2018.

F. Kamiran, A. Karim, and X. Zhang,  "Decision theory for discrimination-aware classification",
In IEEE International Conference on Data Mining, pp. 924–929, 2012.