In [1]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.datasets import BinaryLabelDataset

class DisparateImpactRemovalClassifier(BaseEstimator):

    def __init__(self, estimator, repair_level:int=1.0, verbose=False, random_state=None):
        assert isinstance(estimator, ClassifierMixin), "estimator must be a classifier"
        assert repair_level >= 0.0 and repair_level <= 1.0, "repair_level must be in [0, 1]"
        assert isinstance(verbose, bool), "verbose must be a boolean"
        assert isinstance(random_state, int) or random_state is None, "random_state must be an integer or None"
                
        self.estimator = estimator
        self.repair_level = repair_level
        self.verbose = verbose
        self.random_state = random_state
        self.set_params(random_state=random_state)

    def set_params(self, **kwargs):
        try:
            self.estimator.set_params(**kwargs)
        except:
            pass
 
    def fit(self, X, y, sensitive_features):
        self.preprocessor = DisparateImpactRemover(
            repair_level=self.repair_level, sensitive_attribute=0
        )
        X_processed = self.remove_bias(X, sensitive_features)
        self.estimator.fit(X_processed, y)
        return self

    def remove_bias(self, X, sensitive_features):
        """Remove bias from X using the DisparateImpactRemover preprocessor."""
        assert (X[:, 0] == sensitive_features).all(), \
            "The 1st column of X must be the sensitive attribute."
        
        y_dummy = np.zeros(X.shape[0])
        aif360_data = BinaryLabelDataset(
            df=pd.DataFrame(np.hstack([X, y_dummy.reshape(-1, 1)])), 
            label_names=[X.shape[1]],
            protected_attribute_names=[0],
            privileged_protected_attributes = [[1.0]],
            unprivileged_protected_attributes = [[0.0]],
        )
        X_processed = self.preprocessor.fit_transform(aif360_data).features
        return X_processed

    def predict(self, X, sensitive_features):
        X_processed = self.remove_bias(X, sensitive_features)
        return self.estimator.predict(X_processed)

    def predict_proba(self, X, sensitive_features):
        X_processed = self.remove_bias(X, sensitive_features)
        return self.estimator.predict_proba(X_processed)

`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df

In [2]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from aif360.algorithms.preprocessing import LFR
from aif360.datasets import BinaryLabelDataset

class LFRClassifer(BaseEstimator):

    def __init__(self, estimator, privileged_groups, unprivileged_groups, 
        k=10, Ax=0.1, Ay=1.0, Az=2.0, maxiter=1000, maxfun=1000,
        print_interval=100, verbose=False, random_state=None,):
        assert isinstance(estimator, ClassifierMixin), "estimator must be a classifier"
        assert isinstance(verbose, bool), "verbose must be a boolean"
        assert isinstance(random_state, int) or random_state is None, "random_state must be an integer or None"
                
        self.estimator = estimator
        self.privileged_groups = privileged_groups
        self.unprivileged_groups = unprivileged_groups
        self.k = k
        self.Ax = Ax
        self.Ay = Ay
        self.Az = Az
        self.print_interval = print_interval
        self.maxiter = maxiter
        self.maxfun = maxfun            
        self.verbose = verbose
        self.random_state = random_state
        self.set_params(random_state=random_state)
        self.preprocessor = LFR(
            privileged_groups=privileged_groups,
            unprivileged_groups=unprivileged_groups,
            k=k,
            Ax=Ax,
            Ay=Ay,
            Az=Az,
            print_interval=100,
            verbose=self.verbose,
        )
        self.preprocessor_fit_kwargs = {
            'maxiter': maxiter,
            'maxfun': maxfun,
        }

    def set_params(self, **kwargs):
        try:
            self.estimator.set_params(**kwargs)
        except:
            pass
 
    def fit(self, X, y, sensitive_features):
        self.fit_preprocessor(X, y, sensitive_features)
        X_, y_ = self.transform_data(X, sensitive_features, y=y, return_y=True)
        try:
            self.estimator.fit(X_, y_)
        except:
            self.estimator.fit(X_, y)
        return self

    def translate_into_aif360_dataset(self, X, sensitive_features, y=None):
        if y is None:
            y = np.ones(X.shape[0])
        aif360_data = BinaryLabelDataset(
            df=pd.DataFrame(np.hstack([X, y.reshape(-1, 1)])), 
            label_names=[X.shape[1]],
            protected_attribute_names=[0],
            privileged_protected_attributes = [[1.0]],
            unprivileged_protected_attributes = [[0.0]],
        )
        return aif360_data

    def fit_preprocessor(self, X, y, sensitive_features):
        """Remove bias from X using the DisparateImpactRemover preprocessor."""
        assert (X[:, 0] == sensitive_features).all(), \
            "The 1st column of X must be the sensitive attribute."
        assert y is not None, "y must be provided to fit the LFR preprocessor."

        aif360_data = self.translate_into_aif360_dataset(X, sensitive_features, y=y)
        self.preprocessor.fit(aif360_data, **self.preprocessor_fit_kwargs)
        return
    
    def transform_data(self, X, sensitive_features, y=None, return_y=False):
        aif360_data = self.translate_into_aif360_dataset(X, sensitive_features, y=y)
        X_transformed = self.preprocessor.transform(aif360_data).features
        y_transformed = self.preprocessor.transform(aif360_data).labels.ravel()
        if return_y:
            return X_transformed, y_transformed
        else:
            return X_transformed

    def predict(self, X, sensitive_features):
        X_processed = self.transform_data(X, sensitive_features)
        return self.estimator.predict(X_processed)

    def predict_proba(self, X, sensitive_features):
        X_processed = self.transform_data(X, sensitive_features)
        return self.estimator.predict_proba(X_processed)

In [3]:
import sklearn
import shap

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Import base classifiers
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from baselines import AdaFairClassifier
from imbens.ensemble import SMOTEBoostClassifier, SMOTEBaggingClassifier, RUSBoostClassifier, UnderBaggingClassifier, SelfPacedEnsembleClassifier
from fairlearn.postprocessing import ThresholdOptimizer
from fairens import FairAugEnsemble, FairEnsemble

# Import utilities
from data import FairDataset    # This is a custom class that we will use to load the datasets
from eval import evaluate_multi_split, verbose_print
from trainer import Trainer
from utils import seed_generator, dict_info

In [12]:
"""Load Datasets"""

dataset_kwargs = {
    'y_col': 'label',
    'train_size': 0.6,
    'val_size': 0.2,
    'test_size': 0.2,
    'concat_train_val': True,
    'normalize': True,
    'random_state': 42,
}

all_datasets = {
    'compas': ['sex', 'race'],
    'adult': ['gender', 'race'],
    'bank': ['age', 'marital=married'],
    # 'lsa_unfair_gender_race': ['gender', 'race'],
}

"""
Create a dictionary of datasets: dataset_zoo
key: dataset name
value: FairDataset object
"""
dataset_zoo = {}
for dataname, s_attrs in all_datasets.items():
    for s_attr in s_attrs:
        dataset = FairDataset(
            dataname=dataname,
            csv_path=f'./data/{dataname}.csv',
            s_col=s_attr,
            **dataset_kwargs
        )
        dataset_zoo[dataset.fullname] = dataset

        # dataset.describe()
        dataset.brief()

# Print the information of the datasets and models
print(
    f"////// Dataset ZOO //////\n"
    f"{dict_info(dataset_zoo)}\n"
)

dataset_zoo_subset = {
    'compas_sex': dataset_zoo['compas_sex'],
    'compas_race': dataset_zoo['compas_race'],
    'adult_gender': dataset_zoo['adult_gender'],
    'adult_race': dataset_zoo['adult_race'],
    'bank_age': dataset_zoo['bank_age'],
    'bank_marital=married': dataset_zoo['bank_marital=married'],
}

Dataset    : compas (5875, 12) load from ./data/compas.csv
Sens/Res   : sex/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 929, 1: 3771} | grp_pos_ratio: {0: 0.3617, 1: 0.4916}
test       | size {0: 232, 1: 943} | grp_pos_ratio: {0: 0.3621, 1: 0.491}

Dataset    : compas (5875, 12) load from ./data/compas.csv
Sens/Res   : race/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 1878, 1: 2822} | grp_pos_ratio: {0: 0.3946, 1: 0.5135}
test       | size {0: 469, 1: 706} | grp_pos_ratio: {0: 0.3945, 1: 0.5127}

Dataset    : adult (45222, 99) load from ./data/adult.csv
Sens/Res   : gender/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 11756, 1: 24421} | grp_pos_ratio: {0: 0.1136, 1: 0.3125}
test       | size {0: 2939, 1: 6106} | grp_pos_ratio: {0: 0.1136, 1: 0.3125}

Dataset    : adult (45222, 99) load from ./data/adult.csv
Sens/Res   : race/labe

In [13]:
from trainer import Benchmarker
from baselines import ReweightClassifier, ReductionClassifier
from imbens.ensemble import UnderBaggingClassifier

ensemble_kwargs = {
    'n_estimators': 10,
    'random_state': 42,
}
single_ensemble_kwargs = {
    'n_estimators': 1,
    'random_state': 42,
}

base_models = {
    'LR': LogisticRegression(),
    # 'KN': KNeighborsClassifier(),
    'DT': DecisionTreeClassifier(max_depth=None),
    'MLP': MLPClassifier(hidden_layer_sizes=(8), max_iter=50),
    # 'ADA': AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=None), n_estimators=5),
    # 'BAG': BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=None), n_estimators=5),
}

baselines = {
    # 'AdaBoost': (AdaBoostClassifier, {**ensemble_kwargs}),
    # 'Bagging': (BaggingClassifier, {**ensemble_kwargs}),
    # 'RUSBoost': (RUSBoostClassifier, {**ensemble_kwargs}),
    # 'UnderBag': (UnderBaggingClassifier, {**ensemble_kwargs}),
    # 'SMBoost': (SMOTEBoostClassifier, {**ensemble_kwargs}),
    # 'SMBag': (SMOTEBaggingClassifier, {**ensemble_kwargs}),
    # 'Reweight': (ReweightClassifier, {}),
    # 'ReductionDP': (ReductionClassifier, {'constraints': 'DemographicParity'}),
    # 'ReductionEO': (ReductionClassifier, {'constraints': 'EqualizedOdds'}),
    # 'ThresDP': (ThresholdOptimizer, {'constraints': 'demographic_parity'}),
    # 'ThresEO': (ThresholdOptimizer, {'constraints': 'equalized_odds'}),
    # 'AdaFair': (AdaFairClassifier, {'saIndex': 0, 'saValue': 0, 'CSB': 'CSB2', **ensemble_kwargs}),
    # 'DisparateIR': (DisparateImpactRemovalClassifier, {'repair_level': 1.0}),
    # 'LFR': (LFRClassifer, {'privileged_groups': [{'0': 1}], 'unprivileged_groups': [{'0': 0}]}),
    'UnderBag': (UnderBaggingClassifier, {**ensemble_kwargs}),
}

benchmark = Benchmarker(
    base_models=base_models,
    baselines=baselines,
    datasets=dataset_zoo_subset,
    random_state=42,
    dummy_strategy='stratified',
)
benchmark.run(
    n_runs=5, 
    group_by='dataset', 
    exception='raise',
)

Initializing Benchmarker with:
Random seed: 42
Base models: ['LR', 'DT', 'MLP']
Techniques:  ['UnderBag']
Datasets:    ['compas_sex', 'compas_race', 'adult_gender', 'adult_race', 'bank_age', 'bank_marital=married']
# models:    6
# datasets:  6

Running All models ...


Data: compas_sex | Model: Dummy        :   0%|          | 0/5 [00:00<?, ?it/s]

Data: compas_sex | Model: Dummy        : 100%|██████████| 5/5 [00:00<00:00,  9.07it/s, ACC 0.497±0.021 | BACC 0.495±0.022 | DP 0.010±0.006 | EO 0.044±0.043 | SI 0.000±0.000 | AdvG {0: 4, 1: 1}]
Data: compas_sex | Model: LR           : 100%|██████████| 5/5 [00:00<00:00,  8.56it/s, ACC 0.696±0.006 | BACC 0.690±0.006 | DP 0.336±0.016 | EO 0.382±0.039 | SI 0.210±0.013 | AdvG {1: 5}]
Data: compas_sex | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:01<00:00,  4.27it/s, ACC 0.688±0.011 | BACC 0.688±0.011 | DP 0.377±0.034 | EO 0.371±0.057 | SI 0.248±0.013 | AdvG {1: 5}]
Data: compas_sex | Model: DT           : 100%|██████████| 5/5 [00:00<00:00,  8.04it/s, ACC 0.607±0.013 | BACC 0.603±0.013 | DP 0.085±0.037 | EO 0.090±0.035 | SI 0.238±0.020 | AdvG {1: 5}]
Data: compas_sex | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:01<00:00,  3.65it/s, ACC 0.629±0.015 | BACC 0.625±0.015 | DP 0.156±0.021 | EO 0.140±0.014 | SI 0.245±0.026 | AdvG {1: 5}]
Data: compas_sex | Model: MLP          : 100%|████████

Results on Data: compas_sex
Model: Dummy        | ACC 0.497±0.021            | BACC 0.495±0.022            | DP 0.010±0.006            | EO 0.044±0.043            | SI 0.000±0.000           
Model: LR           | ACC 0.696±0.006            | BACC 0.690±0.006            | DP 0.336±0.016            | EO 0.382±0.039            | SI 0.210±0.013           
Model: LR_UnderBag  | ACC 0.688±0.011 (-3.93%)   | BACC 0.688±0.011 (-1.02%)   | DP 0.377±0.034 (+12.25%)  | EO 0.371±0.057 (-3.03%)   | SI 0.248±0.013 (+17.99%)  | FURG -11.55   | FUTR -3.66   
Model: DT           | ACC 0.607±0.013            | BACC 0.603±0.013            | DP 0.085±0.037            | EO 0.090±0.035            | SI 0.238±0.020           
Model: DT_UnderBag  | ACC 0.629±0.015 (+19.60%)  | BACC 0.625±0.015 (+20.13%)  | DP 0.156±0.021 (+84.60%)  | EO 0.140±0.014 (+54.57%)  | SI 0.245±0.026 (+2.86%)   | FURG -27.48   | FUTR -47.34  
Model: MLP          | ACC 0.686±0.014            | BACC 0.680±0.014            | DP 0.361±0.0

Data: compas_race | Model: Dummy        : 100%|██████████| 5/5 [00:00<00:00,  9.04it/s, ACC 0.491±0.015 | BACC 0.489±0.016 | DP 0.014±0.012 | EO 0.035±0.023 | SI 0.000±0.000 | AdvG {1: 3, 0: 2}]
Data: compas_race | Model: LR           : 100%|██████████| 5/5 [00:00<00:00,  8.40it/s, ACC 0.685±0.011 | BACC 0.680±0.011 | DP 0.310±0.028 | EO 0.337±0.036 | SI 0.058±0.013 | AdvG {1: 5}]
Data: compas_race | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:01<00:00,  4.36it/s, ACC 0.673±0.010 | BACC 0.674±0.010 | DP 0.323±0.030 | EO 0.317±0.045 | SI 0.055±0.016 | AdvG {1: 5}]
Data: compas_race | Model: DT           : 100%|██████████| 5/5 [00:00<00:00,  8.02it/s, ACC 0.614±0.009 | BACC 0.610±0.009 | DP 0.116±0.029 | EO 0.131±0.047 | SI 0.224±0.023 | AdvG {1: 5}]
Data: compas_race | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:01<00:00,  3.75it/s, ACC 0.626±0.012 | BACC 0.623±0.013 | DP 0.166±0.027 | EO 0.185±0.043 | SI 0.238±0.015 | AdvG {1: 5}]
Data: compas_race | Model: MLP          : 100%|██

Results on Data: compas_race
Model: Dummy        | ACC 0.491±0.015            | BACC 0.489±0.016            | DP 0.014±0.012            | EO 0.035±0.023            | SI 0.000±0.000           
Model: LR           | ACC 0.685±0.011            | BACC 0.680±0.011            | DP 0.310±0.028            | EO 0.337±0.036            | SI 0.058±0.013           
Model: LR_UnderBag  | ACC 0.673±0.010 (-6.41%)   | BACC 0.674±0.010 (-3.52%)   | DP 0.323±0.030 (+4.40%)   | EO 0.317±0.045 (-6.13%)   | SI 0.055±0.016 (-5.25%)   | FURG -2.64    | FUTR 0.47    
Model: DT           | ACC 0.614±0.009            | BACC 0.610±0.009            | DP 0.116±0.029            | EO 0.131±0.047            | SI 0.224±0.023           
Model: DT_UnderBag  | ACC 0.626±0.012 (+9.85%)   | BACC 0.623±0.013 (+10.12%)  | DP 0.166±0.027 (+43.37%)  | EO 0.185±0.043 (+41.32%)  | SI 0.238±0.015 (+6.07%)   | FURG -20.28   | FUTR -30.26  
Model: MLP          | ACC 0.674±0.014            | BACC 0.667±0.014            | DP 0.335±0.

Data: adult_gender | Model: Dummy        : 100%|██████████| 5/5 [00:01<00:00,  2.89it/s, ACC 0.624±0.004 | BACC 0.496±0.004 | DP 0.007±0.004 | EO 0.024±0.014 | SI 0.000±0.000 | AdvG {0.0: 3, 1.0: 2}]
Data: adult_gender | Model: LR           : 100%|██████████| 5/5 [00:03<00:00,  1.39it/s, ACC 0.847±0.002 | BACC 0.765±0.003 | DP 0.189±0.011 | EO 0.130±0.038 | SI 0.083±0.007 | AdvG {1.0: 5}]
Data: adult_gender | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:13<00:00,  2.71s/it, ACC 0.801±0.004 | BACC 0.815±0.003 | DP 0.337±0.012 | EO 0.232±0.011 | SI 0.112±0.009 | AdvG {1.0: 5}]
Data: adult_gender | Model: DT           : 100%|██████████| 5/5 [00:03<00:00,  1.60it/s, ACC 0.818±0.004 | BACC 0.749±0.006 | DP 0.179±0.010 | EO 0.088±0.006 | SI 0.051±0.007 | AdvG {1.0: 5}]
Data: adult_gender | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:13<00:00,  2.74s/it, ACC 0.823±0.003 | BACC 0.801±0.007 | DP 0.267±0.012 | EO 0.160±0.009 | SI 0.042±0.006 | AdvG {1.0: 5}]
Data: adult_gender | Model: MLP 

Results on Data: adult_gender
Model: Dummy        | ACC 0.624±0.004            | BACC 0.496±0.004            | DP 0.007±0.004            | EO 0.024±0.014            | SI 0.000±0.000           
Model: LR           | ACC 0.847±0.002            | BACC 0.765±0.003            | DP 0.189±0.011            | EO 0.130±0.038            | SI 0.083±0.007           
Model: LR_UnderBag  | ACC 0.801±0.004 (-20.64%)  | BACC 0.815±0.003 (+18.58%)  | DP 0.337±0.012 (+77.90%)  | EO 0.232±0.011 (+78.26%)  | SI 0.112±0.009 (+35.83%)  | FURG -65.02   | FUTR -62.41  
Model: DT           | ACC 0.818±0.004            | BACC 0.749±0.006            | DP 0.179±0.010            | EO 0.088±0.006            | SI 0.051±0.007           
Model: DT_UnderBag  | ACC 0.823±0.003 (+2.79%)   | BACC 0.801±0.007 (+20.46%)  | DP 0.267±0.012 (+48.98%)  | EO 0.160±0.009 (+80.70%)  | SI 0.042±0.006 (-17.28%)  | FURG -25.84   | FUTR -37.47  
Model: MLP          | ACC 0.847±0.002            | BACC 0.771±0.005            | DP 0.192±0

Data: adult_race | Model: Dummy        : 100%|██████████| 5/5 [00:02<00:00,  2.50it/s, ACC 0.627±0.006 | BACC 0.501±0.005 | DP 0.012±0.004 | EO 0.031±0.015 | SI 0.000±0.000 | AdvG {1.0: 3, 0.0: 2}]
Data: adult_race | Model: LR           : 100%|██████████| 5/5 [00:04<00:00,  1.21it/s, ACC 0.848±0.002 | BACC 0.764±0.004 | DP 0.102±0.004 | EO 0.086±0.031 | SI 0.018±0.003 | AdvG {1.0: 5}]
Data: adult_race | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:14<00:00,  2.86s/it, ACC 0.803±0.003 | BACC 0.816±0.003 | DP 0.179±0.011 | EO 0.123±0.015 | SI 0.024±0.004 | AdvG {1.0: 5}]
Data: adult_race | Model: DT           : 100%|██████████| 5/5 [00:03<00:00,  1.60it/s, ACC 0.815±0.002 | BACC 0.748±0.005 | DP 0.084±0.012 | EO 0.039±0.012 | SI 0.068±0.006 | AdvG {1.0: 5}]
Data: adult_race | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:13<00:00,  2.75s/it, ACC 0.823±0.004 | BACC 0.801±0.004 | DP 0.147±0.007 | EO 0.092±0.009 | SI 0.065±0.004 | AdvG {1.0: 5}]
Data: adult_race | Model: MLP          : 1

Results on Data: adult_race
Model: Dummy        | ACC 0.627±0.006            | BACC 0.501±0.005            | DP 0.012±0.004            | EO 0.031±0.015            | SI 0.000±0.000           
Model: LR           | ACC 0.848±0.002            | BACC 0.764±0.004            | DP 0.102±0.004            | EO 0.086±0.031            | SI 0.018±0.003           
Model: LR_UnderBag  | ACC 0.803±0.003 (-20.17%)  | BACC 0.816±0.003 (+19.83%)  | DP 0.179±0.011 (+75.78%)  | EO 0.123±0.015 (+43.44%)  | SI 0.024±0.004 (+33.25%)  | FURG -50.99   | FUTR -50.82  
Model: DT           | ACC 0.815±0.002            | BACC 0.748±0.005            | DP 0.084±0.012            | EO 0.039±0.012            | SI 0.068±0.006           
Model: DT_UnderBag  | ACC 0.823±0.004 (+4.35%)   | BACC 0.801±0.004 (+21.51%)  | DP 0.147±0.007 (+74.73%)  | EO 0.092±0.009 (+134.77%) | SI 0.065±0.004 (-4.21%)   | FURG -55.50   | FUTR -68.43  
Model: MLP          | ACC 0.848±0.002            | BACC 0.760±0.006            | DP 0.099±0.0

Data: bank_age | Model: Dummy        : 100%|██████████| 5/5 [00:01<00:00,  2.63it/s, ACC 0.777±0.003 | BACC 0.499±0.006 | DP 0.016±0.016 | EO 0.052±0.037 | SI 0.000±0.000 | AdvG {0: 5}]
Data: bank_age | Model: LR           : 100%|██████████| 5/5 [00:03<00:00,  1.48it/s, ACC 0.897±0.003 | BACC 0.678±0.005 | DP 0.103±0.033 | EO 0.144±0.106 | SI 0.011±0.003 | AdvG {1: 5}]
Data: bank_age | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:08<00:00,  1.69s/it, ACC 0.850±0.005 | BACC 0.849±0.007 | DP 0.245±0.028 | EO 0.191±0.036 | SI 0.023±0.007 | AdvG {1: 5}]
Data: bank_age | Model: DT           : 100%|██████████| 5/5 [00:03<00:00,  1.55it/s, ACC 0.878±0.004 | BACC 0.725±0.005 | DP 0.099±0.028 | EO 0.115±0.067 | SI 0.024±0.003 | AdvG {1: 5}]
Data: bank_age | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:08<00:00,  1.68s/it, ACC 0.873±0.003 | BACC 0.857±0.007 | DP 0.148±0.041 | EO 0.091±0.056 | SI 0.017±0.004 | AdvG {1: 5}]
Data: bank_age | Model: MLP          : 100%|██████████| 5/5 [00:18<00:

Results on Data: bank_age
Model: Dummy        | ACC 0.777±0.003            | BACC 0.499±0.006            | DP 0.016±0.016            | EO 0.052±0.037            | SI 0.000±0.000           
Model: LR           | ACC 0.897±0.003            | BACC 0.678±0.005            | DP 0.103±0.033            | EO 0.144±0.106            | SI 0.011±0.003           
Model: LR_UnderBag  | ACC 0.850±0.005 (-38.77%)  | BACC 0.849±0.007 (+95.54%)  | DP 0.245±0.028 (+137.95%) | EO 0.191±0.036 (+32.52%)  | SI 0.023±0.007 (+109.73%) | FURG -65.02   | FUTR -93.40  
Model: DT           | ACC 0.878±0.004            | BACC 0.725±0.005            | DP 0.099±0.028            | EO 0.115±0.067            | SI 0.024±0.003           
Model: DT_UnderBag  | ACC 0.873±0.003 (-5.14%)   | BACC 0.857±0.007 (+58.65%)  | DP 0.148±0.041 (+49.24%)  | EO 0.091±0.056 (-20.69%)  | SI 0.017±0.004 (-26.82%)  | FURG 26.18    | FUTR -0.58   
Model: MLP          | ACC 0.900±0.003            | BACC 0.708±0.014            | DP 0.113±0.030

Data: bank_marital=married | Model: Dummy        : 100%|██████████| 5/5 [00:01<00:00,  4.72it/s, ACC 0.778±0.003 | BACC 0.501±0.002 | DP 0.004±0.003 | EO 0.021±0.021 | SI 0.000±0.000 | AdvG {0: 3, 1: 2}]
Data: bank_marital=married | Model: LR           : 100%|██████████| 5/5 [00:01<00:00,  2.80it/s, ACC 0.900±0.001 | BACC 0.688±0.006 | DP 0.021±0.006 | EO 0.037±0.031 | SI 0.001±0.001 | AdvG {1: 5}]
Data: bank_marital=married | Model: LR_UnderBag  : 100%|██████████| 5/5 [00:04<00:00,  1.09it/s, ACC 0.852±0.003 | BACC 0.855±0.004 | DP 0.056±0.007 | EO 0.051±0.016 | SI 0.003±0.002 | AdvG {1: 5}]
Data: bank_marital=married | Model: DT           : 100%|██████████| 5/5 [00:01<00:00,  2.73it/s, ACC 0.877±0.002 | BACC 0.731±0.002 | DP 0.027±0.010 | EO 0.033±0.023 | SI 0.018±0.006 | AdvG {1: 5}]
Data: bank_marital=married | Model: DT_UnderBag  : 100%|██████████| 5/5 [00:05<00:00,  1.04s/it, ACC 0.873±0.002 | BACC 0.865±0.009 | DP 0.036±0.011 | EO 0.023±0.011 | SI 0.012±0.002 | AdvG {1: 5}]
Data

Results on Data: bank_marital=married
Model: Dummy        | ACC 0.778±0.003            | BACC 0.501±0.002            | DP 0.004±0.003            | EO 0.021±0.021            | SI 0.000±0.000           
Model: LR           | ACC 0.900±0.001            | BACC 0.688±0.006            | DP 0.021±0.006            | EO 0.037±0.031            | SI 0.001±0.001           
Model: LR_UnderBag  | ACC 0.852±0.003 (-38.88%)  | BACC 0.855±0.004 (+89.21%)  | DP 0.056±0.007 (+163.81%) | EO 0.051±0.016 (+39.71%)  | SI 0.003±0.002 (+254.17%) | FURG -127.40  | FUTR -152.56 
Model: DT           | ACC 0.877±0.002            | BACC 0.731±0.002            | DP 0.027±0.010            | EO 0.033±0.023            | SI 0.018±0.006           
Model: DT_UnderBag  | ACC 0.873±0.002 (-4.02%)   | BACC 0.865±0.009 (+58.23%)  | DP 0.036±0.011 (+32.85%)  | EO 0.023±0.011 (-29.45%)  | SI 0.012±0.002 (-31.84%)  | FURG 36.59    | FUTR 9.48    
Model: MLP          | ACC 0.903±0.002            | BACC 0.721±0.023            | DP




In [6]:
from aif360.sklearn.datasets import fetch_adult
from aif360.datasets import AdultDataset, BankDataset, BinaryLabelDataset

# fetch_adult()
# ad = AdultDataset()

`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df

In [7]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from matplotlib import pyplot as plt

import sys
sys.path.append("../")
import warnings

import numpy as np
from tqdm import tqdm

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC as SVM
from sklearn.preprocessing import MinMaxScaler

from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric


In [8]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Import base classifiers
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from baselines import AdaFairClassifier
from imbens.ensemble import SMOTEBoostClassifier, SMOTEBaggingClassifier, RUSBoostClassifier, UnderBaggingClassifier, SelfPacedEnsembleClassifier
from fairlearn.postprocessing import ThresholdOptimizer
from fairens import FairAugEnsemble, FairEnsemble

# import aif360 
import aif360

# Import utilities
from data import FairDataset    # This is a custom class that we will use to load the datasets
from eval import evaluate_multi_split, verbose_print
from trainer import Trainer
from utils import seed_generator, dict_info

In [9]:
from eval import flip_s_in_X, evaluate

dataset_kwargs = {
    'y_col': 'label',
    'train_size': 0.6,
    'val_size': 0.2,
    'test_size': 0.2,
    'concat_train_val': True,
    'normalize': True,
    'random_state': 42,
}

dataname = 'adult'
s_attr = 'gender'
data = FairDataset(
    dataname=dataname,
    csv_path=f'./data/{dataname}.csv',
    s_col=s_attr,
    x_with_s=True,
    **dataset_kwargs
)

data.describe()

(
    (X_train, y_train, s_train),
    (X_val, y_val, s_val),
    (X_test, y_test, s_test),
) = data.train_val_test_split(x_with_s=True)

print(X_train.shape, X_val.shape, X_test.shape)

classes = np.unique(y_train)
n_feat = X_train.shape[1]
n_class = len(classes)

clf = MLPClassifier(hidden_layer_sizes=(32), max_iter=500)

clf.fit(X_train, y_train)

print(evaluate(clf, X_train, y_train, s_train))
print(evaluate(clf, X_test, y_test, s_test))

Dataset    : adult (45222, 99) load from ./data/adult.csv
Sens/Res   : gender/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train data [#samples 36177 #features 98]:
+-----+-------+-------+------------+
|     |   y=0 |   y=1 |   pos_rate |
| s=0 | 10421 |  1335 |     0.1136 |
+-----+-------+-------+------------+
| s=1 | 16790 |  7631 |     0.3125 |
+-----+-------+-------+------------+
test data [#samples 9045 #features 98]:
+-----+-------+-------+------------+
|     |   y=0 |   y=1 |   pos_rate |
| s=0 |  2605 |   334 |     0.1136 |
+-----+-------+-------+------------+
| s=1 |  4198 |  1908 |     0.3125 |
+-----+-------+-------+------------+

(27132, 98) (9045, 98) (9045, 98)
{'acc': 0.8670573492554917, 'bacc': 0.7777528775851861, 'dp': 0.15729116477670407, 'eo': 0.04937490164507953, 'si': 0.0911469851098334, 'acc_grp': {0.0: 0.937, 1.0: 0.833}, 'pos_rate_grp': {0.0: 0.077, 1.0: 0.234}, 'g_adv': 1.0, 'acc_cls': {0.0: 0.955, 1.0: 0.601}}
{'acc': 0.844776119

In [10]:
data.df

Unnamed: 0,gender,capital-gain,race,age,education-num,capital-loss,hours-per-week,workclass=Federal-gov,workclass=Local-gov,workclass=Private,...,native-country=Puerto-Rico,native-country=Scotland,native-country=South,native-country=Taiwan,native-country=Thailand,native-country=Trinadad&Tobago,native-country=United-States,native-country=Vietnam,native-country=Yugoslavia,label
0,1.0,0.021740,1.0,0.301370,0.800000,0.0,0.397959,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.000000,1.0,0.452055,0.800000,0.0,0.122449,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.000000,1.0,0.287671,0.533333,0.0,0.397959,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,1.0,0.000000,0.0,0.493151,0.400000,0.0,0.397959,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0.0,0.000000,0.0,0.150685,0.800000,0.0,0.397959,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45217,1.0,0.000000,1.0,0.219178,0.800000,0.0,0.397959,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
45218,0.0,0.000000,1.0,0.301370,0.800000,0.0,0.357143,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
45219,1.0,0.000000,1.0,0.287671,0.800000,0.0,0.500000,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
45220,1.0,0.054551,0.0,0.369863,0.800000,0.0,0.397959,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [11]:
from aif360.datasets import BinaryLabelDataset

df_train = pd.DataFrame(np.hstack([X_train, y_train.reshape(-1, 1)]))
df_test = pd.DataFrame(np.hstack([X_test, y_test.reshape(-1, 1)]))

data_train = BinaryLabelDataset(
    df=pd.DataFrame(np.hstack([X_train, y_train.reshape(-1, 1)])), 
    label_names=[df_train.columns[-1]],
    protected_attribute_names=[0],
    privileged_protected_attributes = [[1.0]],
    unprivileged_protected_attributes = [[0.0]],
)
bias_remover = DisparateImpactRemover(repair_level=1.0)
debiased_data_train = bias_remover.fit_transform(data_train)

print((train_repd.features != X_train).sum() / X_train.size)

NameError: name 'train_repd' is not defined

In [None]:
# validate whether the 1st column of X is the sensitive attribute
(train_repd.features[:, 0] == s_train).all()

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from aif360.algorithms.preprocessing import DisparateImpactRemover

class DisparateImpactRemovalClassifier(BaseEstimator):

    def __init__(self, estimator, repair_level:int=1.0, verbose=False, random_state=None):
        assert isinstance(estimator, ClassifierMixin), "estimator must be a classifier"
        assert repair_level >= 0.0 and repair_level <= 1.0, "repair_level must be in [0, 1]"
        assert isinstance(verbose, bool), "verbose must be a boolean"
        assert isinstance(random_state, int) or random_state is None, "random_state must be an integer or None"
                
        self.estimator = estimator
        self.repair_level = repair_level
        self.verbose = verbose
        self.random_state = random_state
        self.set_params(random_state=random_state)

    def set_params(self, **kwargs):
        try:
            self.estimator.set_params(**kwargs)
        except:
            pass
 
    def fit(self, X, y, sensitive_features):
        self.preprocessor = DisparateImpactRemover(
            repair_level=self.repair_level, sensitive_attribute=0
        )
        X_processed = self.remove_bias(X, sensitive_features)
        self.estimator.fit(X_processed, y)
        return self

    def remove_bias(self, X, sensitive_features):
        """Remove bias from X using the DisparateImpactRemover preprocessor."""
        assert (X[:, 0] == sensitive_features).all(), \
            "The 1st column of X must be the sensitive attribute."
        
        y_dummy = np.zeros(X.shape[0])
        aif360_data = BinaryLabelDataset(
            df=pd.DataFrame(np.hstack([X, y_dummy.reshape(-1, 1)])), 
            label_names=[X.shape[1]],
            protected_attribute_names=[0],
            privileged_protected_attributes = [[1.0]],
            unprivileged_protected_attributes = [[0.0]],
        )
        X_processed = self.preprocessor.fit_transform(aif360_data).features
        return X_processed

    def predict(self, X, sensitive_features):
        X_processed = self.remove_bias(X, sensitive_features)
        return self.estimator.predict(X_processed)

    def predict_proba(self, X, sensitive_features):
        X_processed = self.remove_bias(X, sensitive_features)
        return self.estimator.predict_proba(X_processed)

In [None]:
base_clf = LogisticRegression()
clf = DisparateImpactRemovalClassifier(base_clf, repair_level=1.0)

clf.fit(X_train, y_train, s_train)

In [None]:
from aif360.datasets import BinaryLabelDataset

# transform into aif360 dataset
df_train = pd.DataFrame(X_train, columns=data.df.columns[:-1])
df_train['label'] = y_train
df_test = pd.DataFrame(X_test, columns=data.df.columns[:-1])
df_test['label'] = y_test


data_train = BinaryLabelDataset(
    df=df_train, label_names=['label'], protected_attribute_names=['gender'],
    privileged_protected_attributes = [[1.0]],
    unprivileged_protected_attributes = [[0.0]],
    )
index = data_train.feature_names.index('gender')
data_test = BinaryLabelDataset(df=df_test, label_names=['label'], protected_attribute_names=['gender'])

di = DisparateImpactRemover(repair_level=1.0,sensitive_attribute=0)
train_repd = di.fit_transform(data_train)
test_repd = di.fit_transform(data_test)

X_tr = np.delete(train_repd.features, index, axis=1)
X_te = np.delete(test_repd.features, index, axis=1)
y_tr = train_repd.labels.ravel()

X_tr.shape, X_te.shape, y_tr.shape

(train_repd.features != X_train).sum() / X_train.size

In [None]:
train_repd.features

In [None]:
# clf = MLPClassifier(hidden_layer_sizes=(32), max_iter=500)
clf = LogisticRegression(class_weight='balanced', solver='liblinear')

clf.fit(X_tr, y_train)

print(evaluate(clf, X_tr, y_train, s_train))
print(evaluate(clf, X_te, y_test, s_test))

clf.fit(X_train, y_train)

print(evaluate(clf, X_train, y_train, s_train))
print(evaluate(clf, X_test, y_test, s_test))

In [None]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
from sklearn.utils.validation import has_fit_parameter

from aif360.sklearn.utils import check_inputs, check_groups

class ReweightClassifier(BaseEstimator):
    def __init__(self, estimator, random_state=None):
        self.estimator = estimator
        self.random_state = random_state
        self.set_params(random_state=random_state)

    def set_params(self, **kwargs):
        try:
            self.estimator.set_params(**kwargs)
        except:
            pass

    def fit(self, X_train, y_train):
        preprocessor = Reweighing(prot_attr=0)
        _, sample_weight = preprocessor.fit_transform(pd.DataFrame(X_train), y_train)
        self.estimator.fit(X_train, y_train, sample_weight=sample_weight)
        return self

    def predict(self, X_test):
        return self.estimator.predict(X_test)

    def predict_proba(self, X_test):
        return self.estimator.predict_proba(X_test)
