In [25]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from aif360.algorithms.inprocessing import GerryFairClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import sklearn

import aif360
from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions\
            import get_distortion_adult, get_distortion_german, get_distortion_compas

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

from aif360.datasets import AdultDataset, BankDataset, CompasDataset, GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing
from aif360.algorithms.preprocessing.optim_preproc import OptimPreproc
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools
import copy

from IPython.display import Markdown, display
import warnings
warnings.filterwarnings('ignore')
%load_ext jupyternotify
np.random.seed(1)

The jupyternotify extension is already loaded. To reload it, use:
  %reload_ext jupyternotify


In [34]:
def f1_score(CM, priv=None):
            numer = CM.num_true_positives(privileged=priv)
            denom = CM.num_true_positives(privileged=priv) + 0.5*float(CM.num_false_positives(privileged=priv) + CM.num_false_negatives(privileged=priv))
            return float(numer/denom)

In [27]:
def get_dataset_options(dataset_name):
    optim_options = None
    if dataset_name=="adult":
        optim_options = {
        "distortion_fun": get_distortion_adult,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'sex'
        return (AdultDataset(), pro_attr, [{'sex': 1}], [{'sex': 0}], optim_options)
    elif dataset_name=="compas":
        optim_options = {
        "distortion_fun": get_distortion_compas,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'race'
        return (CompasDataset(), pro_attr, [{'race': 1}], [{'race': 0}], optim_options)
    elif dataset_name=="bank":
        pro_attr = 'age'
        return (BankDataset(protected_attribute_names=['age'],
            privileged_classes=[lambda x: x >= 25], 
            features_to_drop=['day_of_week']), pro_attr, [{'age': 1}], [{'age': 0}], None)
    elif dataset_name=="german":
        optim_options = {
            "distortion_fun": get_distortion_german,
            "epsilon": 0.1,
            "clist": [0.99, 1.99, 2.99],
            "dlist": [.1, 0.05, 0]
        }   
        pro_attr = 'age'
        label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
        #g = load_preproc_data_german(['age'])
        g = GermanDataset(metadata={'label_maps': [label_map]})
        g.labels = (2.0 - g.labels).astype('float64')
        g.favorable_label = 1.0
        g.unfavorable_label = 0.0
        #g.metadata['label_maps'] = [label_map]

        # load_preproc_data_german(['age'])
        return (g, pro_attr, [{'age': 1}], [{'age': 0}], optim_options)

In [41]:
def run_DIR(DIR=True, bal=False, dataset_name = "adult"):
    dataset, pro_attr, privileged_groups, unprivileged_groups, optim_options = get_dataset_options(dataset_name)
    preprocessing_algo = preprocessing.DisparateImpactRemover(sensitive_attribute=pro_attr)

    scale_orig = StandardScaler()
    dataset.features = scale_orig.fit_transform(dataset.features)

    dataset_train, dataset_test = dataset.split([0.7], shuffle = True, seed=0)

    model = None
    
    if bal is True:
        model = sklearn.linear_model.LogisticRegression(class_weight='balanced') # solver='liblinear', class_weight='balanced', 
    else:
        model = sklearn.linear_model.LogisticRegression()

    dataset_train_pred = dataset_train.copy(deepcopy=True)
    dataset_test_pred = dataset_test.copy(deepcopy=True)
    
    if DIR is True:
        dataset_train_pred = preprocessing_algo.fit_transform(dataset_train_pred)
        dataset_test_pred = preprocessing_algo.fit_transform(dataset_test_pred)

    model.fit(dataset_train_pred.features, dataset_train_pred.labels)   # .ravel()
    fav_idx = np.where(model.classes_ == dataset_train.favorable_label)[0][0]
    dataset_train_pred.scores = model.predict_proba(dataset_train_pred.features)[:,fav_idx].reshape(-1,1) 
    dataset_train_pred.labels = model.predict(dataset_train_pred.features).reshape(-1,1) 
    dataset_test_pred.scores = model.predict_proba(dataset_test_pred.features)[:,fav_idx].reshape(-1,1) 
    dataset_test_pred.labels = model.predict(dataset_test_pred.features).reshape(-1,1)  

    dataset_test_pred.features = dataset_test.features 
    CM = ClassificationMetric(dataset_test,
                                  dataset_test_pred,
                                  unprivileged_groups=unprivileged_groups,
                                  privileged_groups=privileged_groups)
    return round(CM.accuracy(), 4), round(f1_score(CM), 4)

In [42]:
# original, debiased
for dataset_name in ["adult", "compas", "bank", "german"]:
    print("Dataset: ", dataset_name)
    print(run_DIR(DIR=False, dataset_name = dataset_name),  run_DIR(DIR=True, dataset_name = dataset_name))



Dataset:  adult




(0.8505, 0.6639) (0.848, 0.6882)
Dataset:  compas




(0.6921, 0.7366) (0.6942, 0.7418)
Dataset:  bank




(0.8959, 0.5113) (0.894, 0.4932)
Dataset:  german
(0.7533, 0.8377) (0.7633, 0.8419)


In [43]:
# Class balanced classifier
# original, debiased

for dataset_name in ["adult", "compas", "bank", "german"]:
    print("Dataset: ", dataset_name)
    print(run_DIR(DIR=False, bal=True, dataset_name = dataset_name),  run_DIR(DIR=True, bal=True, dataset_name = dataset_name))



Dataset:  adult




(0.8103, 0.6863) (0.8005, 0.6754)
Dataset:  compas




(0.6759, 0.7056) (0.6823, 0.7132)
Dataset:  bank




(0.8474, 0.5958) (0.8361, 0.5833)
Dataset:  german
(0.7133, 0.7839) (0.7133, 0.785)


<b>Finding: </b>This notebook replicates the fact that DIR can yield higher Accuracy/F1 score than the baseline. This is an interesting finding that needs further investigation for its causes. If we train the classifier that accounts for imbalanced output class distribution, this trend might still persist as seen in the case of COMPAS dataset.