# **Training Model for Binary Classification Problem with Bias Mitigators**

In [1]:
# sys path
import sys
sys.path.append('../../')

In [2]:
# Imports
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# Settings
np.random.seed(0)
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Dataset
data = load_adult()

# Dataframe
df = pd.concat([data["data"], data["target"]], axis=1)
protected_variables = ["sex", "race"]
output_variable = ["class"]

# Simple preprocessing
y = df[output_variable].replace({">50K": 1, "<=50K": 0})
X = pd.get_dummies(df.drop(protected_variables + output_variable, axis=1))
group = ["sex"]
group_a = df[group] == "Female"
group_b = df[group] == "Male"
data_ = [X, y, group_a, group_b]

# Train test split
dataset = train_test_split(*data_, test_size=0.2, shuffle=True)
train_data = dataset[::2]
test_data = dataset[1::2]

In [4]:
# the dataframe
data['frame']

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
0,25.0,Private,226802.0,11th,7.0,Never-married,Machine-op-inspct,Own-child,Black,Male,0.0,0.0,40.0,United-States,<=50K
1,38.0,Private,89814.0,HS-grad,9.0,Married-civ-spouse,Farming-fishing,Husband,White,Male,0.0,0.0,50.0,United-States,<=50K
2,28.0,Local-gov,336951.0,Assoc-acdm,12.0,Married-civ-spouse,Protective-serv,Husband,White,Male,0.0,0.0,40.0,United-States,>50K
3,44.0,Private,160323.0,Some-college,10.0,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688.0,0.0,40.0,United-States,>50K
4,18.0,,103497.0,Some-college,10.0,Never-married,,Own-child,White,Female,0.0,0.0,30.0,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27.0,Private,257302.0,Assoc-acdm,12.0,Married-civ-spouse,Tech-support,Wife,White,Female,0.0,0.0,38.0,United-States,<=50K
48838,40.0,Private,154374.0,HS-grad,9.0,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0.0,0.0,40.0,United-States,>50K
48839,58.0,Private,151910.0,HS-grad,9.0,Widowed,Adm-clerical,Unmarried,White,Female,0.0,0.0,40.0,United-States,<=50K
48840,22.0,Private,201490.0,HS-grad,9.0,Never-married,Adm-clerical,Own-child,White,Male,0.0,0.0,20.0,United-States,<=50K


In [4]:
# efficacy metrics from sklearn
from sklearn import metrics

# dictionnary of metrics
metrics_dict={
        "Accuracy": metrics.accuracy_score,
        "Balanced accuracy": metrics.balanced_accuracy_score,
        "Precision": metrics.precision_score,
        "Recall": metrics.recall_score,
        "F1-Score": metrics.f1_score}

# efficacy metrics dataframe helper tool
def metrics_dataframe(y_pred, y_true, metrics_dict=metrics_dict):
    metric_list = [[pf, fn(y_true, y_pred)] for pf, fn in metrics_dict.items()]
    return pd.DataFrame(metric_list, columns=["Metric", "Value"]).set_index("Metric")

## Baseline

In [5]:
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression()),
    ])

X, y, group_a, group_b = train_data
pipeline.fit(X, y)

X, y, group_a, group_b = test_data
y_pred = pipeline.predict(X)
df_baseline = classification_bias_metrics(group_a, 
                            group_b, 
                            y_pred, 
                            y, metric_type='both')

df_eff_baseline = metrics_dataframe(y, y_pred)

## Utils

In [6]:
def fit_and_evaluate_pipeline(pipeline, data_cls=None):
    
    X, y, group_a, group_b = train_data
    fit_params = {
        'bm__group_a': group_a,
        'bm__group_b': group_b
    }
    pipeline.fit(X, y, **fit_params)
    
    X, y, group_a, group_b = test_data
    predict_params = {
        'bm__group_a': group_a,
        'bm__group_b': group_b,
    }
    y_pred = pipeline.predict(X, **predict_params)
    
    df = classification_bias_metrics(group_a, 
                                group_b, 
                                y_pred, 
                                y, metric_type='both')
    df_eff = metrics_dataframe(y, y_pred)
    return df,df_eff

def format_result_colum(name,df):
    return df.rename(columns={'Value':name}).iloc[:,0]

def show_bias_result_table(configurations, df_baseline):
    table = pd.concat([df_baseline.iloc[:,0]] + [format_result_colum(name,config['result']['bias']) 
            for name,config in configurations.items()] + [df_baseline.iloc[:,1]],axis=1)
    return table.rename(columns={'Value':'Baseline'})

def show_efficacy_result_table(configurations, df_baseline):
    table = pd.concat([df_baseline.iloc[:,0]] + [format_result_colum(name,config['result']['efficacy']) 
            for name,config in configurations.items()],axis=1)
    return table.rename(columns={'Value':'Baseline'})

In [8]:
from collections import defaultdict
configurations = defaultdict(dict)

from holisticai.bias.mitigation import MLDebiaser
configurations['MLDebiaser']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression()),
    ('bm_postprocessing', MLDebiaser(sgd_steps=10_000,
                       full_gradient_epochs=500, 
                       max_iter=20, verbose=True)),
    ])

for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

[elapsed time: 00:00:00 | iter:2/2]


In [9]:
show_bias_result_table(configurations, df_baseline)

Unnamed: 0_level_0,Baseline,MLDebiaser,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Statistical Parity,-0.178353,-0.081532,0
Disparate Impact,0.309067,0.653059,1
Four Fifths Rule,0.309067,0.653059,1
Cohen D,-0.457755,-0.201936,0
Equality of Opportunity Difference,-0.059581,0.206436,0
False Positive Rate Difference,-0.082695,-0.01242,0
Average Odds Difference,-0.071138,0.097008,0
Accuracy Difference,0.122388,0.100479,0


## Pre-processing

In [10]:
from collections import defaultdict
configurations = defaultdict(dict)

from holisticai.bias.mitigation import Reweighing
configurations['Reweighing']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_preprocessing', Reweighing()),
    ('classifier', LogisticRegression()),
    ])

from holisticai.bias.mitigation import LearningFairRepresentation
configurations['Learning Fair Representation']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_preprocessing', LearningFairRepresentation(k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=1, print_interval=250)),
    ('classifier', LogisticRegression()),
    ])

### Run Configurations

In [11]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

step: 250	loss: 1.2084	L_x: 2.5817	L_y: 0.9418	L_z: 0.0042
step: 500	loss: 1.2084	L_x: 2.5817	L_y: 0.9418	L_z: 0.0042
step: 750	loss: 1.2084	L_x: 2.5817	L_y: 0.9418	L_z: 0.0042
step: 1000	loss: 0.9300	L_x: 2.5817	L_y: 0.6636	L_z: 0.0041
step: 1250	loss: 0.9300	L_x: 2.5817	L_y: 0.6636	L_z: 0.0041
step: 1500	loss: 0.9300	L_x: 2.5817	L_y: 0.6636	L_z: 0.0041
step: 1750	loss: 0.9300	L_x: 2.5817	L_y: 0.6636	L_z: 0.0041
step: 2000	loss: 0.8228	L_x: 2.5804	L_y: 0.5571	L_z: 0.0038
step: 2250	loss: 0.8228	L_x: 2.5804	L_y: 0.5571	L_z: 0.0038
step: 2500	loss: 0.8228	L_x: 2.5804	L_y: 0.5571	L_z: 0.0038
step: 2750	loss: 0.8228	L_x: 2.5804	L_y: 0.5571	L_z: 0.0038
step: 3000	loss: 0.8150	L_x: 2.5785	L_y: 0.5502	L_z: 0.0035
step: 3250	loss: 0.8150	L_x: 2.5785	L_y: 0.5502	L_z: 0.0035
step: 3500	loss: 0.8150	L_x: 2.5785	L_y: 0.5502	L_z: 0.0035
step: 3750	loss: 0.8150	L_x: 2.5785	L_y: 0.5502	L_z: 0.0035
step: 4000	loss: 0.8133	L_x: 2.5777	L_y: 0.5488	L_z: 0.0033
step: 4250	loss: 0.8133	L_x: 2.5777	L_y: 0.

In [12]:
show_bias_result_table(configurations, df_baseline)

Unnamed: 0_level_0,Baseline,Reweighing,Learning Fair Representation,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Statistical Parity,-0.178353,-0.096208,-0.034261,0
Disparate Impact,0.309067,0.553714,0.528271,1
Four Fifths Rule,0.309067,0.553714,0.528271,1
Cohen D,-0.457755,-0.250423,-0.143359,0
Equality of Opportunity Difference,-0.059581,0.165055,0.021812,0
False Positive Rate Difference,-0.082695,-0.030316,-0.021325,0
Average Odds Difference,-0.071138,0.06737,0.000243,0
Accuracy Difference,0.122388,0.121556,0.180189,0


In [13]:
show_efficacy_result_table(configurations, df_eff_baseline)

Unnamed: 0_level_0,Baseline,Reweighing,Learning Fair Representation
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Accuracy,0.850241,0.843484,0.765585
Balanced accuracy,0.804536,0.799103,0.664535
Precision,0.600681,0.555556,0.139634
Recall,0.728822,0.72905,0.549414
F1-Score,0.658576,0.630587,0.222675


## Post Processing

In [14]:
from collections import defaultdict
configurations = defaultdict(dict)

from holisticai.bias.mitigation import EqualizedOdds
configurations['Equalized Odds']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression()),
    ('bm_postprocessing', EqualizedOdds()),
    ])

from holisticai.bias.mitigation import CalibratedEqualizedOdds
configurations['Calibrated Equalized Odds']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression()),
    ('bm_postprocessing', CalibratedEqualizedOdds()),
    ])

from holisticai.bias.mitigation import RejectOptionClassification
configurations['Reject Option Classification']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression()),
    ('bm_postprocessing', RejectOptionClassification(metric_name="Statistical parity difference", verbose=1)),
    ])

### Run Configurations

In [15]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

Progress: 100/100

In [16]:
show_bias_result_table(configurations, df_baseline)

Unnamed: 0_level_0,Baseline,Equalized Odds,Calibrated Equalized Odds,Reject Option Classification,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Statistical Parity,-0.178353,-0.095813,-0.108962,-0.059864,0
Disparate Impact,0.309067,0.587965,0.422695,0.845638,1
Four Fifths Rule,0.309067,0.587965,0.422695,0.845638,1
Cohen D,-0.457755,-0.24093,-0.306568,-0.124368,0
Equality of Opportunity Difference,-0.059581,0.020744,0.109945,0.148833,0
False Positive Rate Difference,-0.082695,-0.010227,-0.057379,0.038114,0
Average Odds Difference,-0.071138,0.005258,0.026283,0.093473,0
Accuracy Difference,0.122388,0.080495,0.156621,-0.016622,0


In [17]:
show_efficacy_result_table(configurations, df_eff_baseline)

Unnamed: 0_level_0,Baseline,Equalized Odds,Calibrated Equalized Odds,Reject Option Classification
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accuracy,0.850241,0.82598,0.827516,0.779609
Balanced accuracy,0.804536,0.765959,0.784805,0.726811
Precision,0.600681,0.554704,0.457642,0.806301
Recall,0.728822,0.665815,0.723419,0.527283
F1-Score,0.658576,0.605202,0.560626,0.637603


## Inprocessing

In [18]:
from collections import defaultdict
configurations = defaultdict(dict)

from holisticai.bias.mitigation import GridSearchReduction

model = LogisticRegression()
inprocessing_model = GridSearchReduction(constraints="DemographicParity", grid_size=20, verbose=1).transform_estimator(model)

configurations['GridSearch Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])

from holisticai.bias.mitigation import ExponentiatedGradientReduction

model = LogisticRegression()
inprocessing_model = ExponentiatedGradientReduction(constraints="DemographicParity", verbose=1).transform_estimator(model)

configurations['ExponentiatedGradient Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])

### Run Configurations

In [19]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

steps: 5	Best gap:6.41039

In [20]:
show_bias_result_table(configurations, df_baseline)

Unnamed: 0_level_0,Baseline,GridSearch Reduction,ExponentiatedGradient Reduction,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Statistical Parity,-0.178353,0.014422,-0.009015,0
Disparate Impact,0.309067,1.097528,0.943353,1
Four Fifths Rule,0.309067,0.911139,0.943353,1
Cohen D,-0.457755,0.0401,-0.024839,0
Equality of Opportunity Difference,-0.059581,0.374346,0.326614,0
False Positive Rate Difference,-0.082695,0.046518,0.029895,0
Average Odds Difference,-0.071138,0.210432,0.178254,0
Accuracy Difference,0.122388,0.11033,0.1141,0


In [21]:
show_efficacy_result_table(configurations, df_eff_baseline)

Unnamed: 0_level_0,Baseline,GridSearch Reduction,ExponentiatedGradient Reduction
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Accuracy,0.850241,0.824445,0.828642
Balanced accuracy,0.804536,0.778548,0.784904
Precision,0.600681,0.452533,0.468284
Recall,0.728822,0.712466,0.721311
F1-Score,0.658576,0.553502,0.567888
