# Reweighing

This tutorial demostrates how apply Reweighing Pre-processing algorithm for bias mitigation. 

In [1]:
# sys path
import sys
sys.path.append('../../../')

# Imports
from utils import load_preprocessed_adult
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from holisticai.bias.metrics import statistical_parity

import numpy as np
import pandas as pd

# Settings
np.random.seed(0)
import warnings
warnings.filterwarnings("ignore")

Load preprocessed adult dataset (details in preprocessing_dataset.ipynb)
- protected attributes: ["sex" , "race"] (attributes removed from original dataset)
- sensible attribute: "sex" (groups: "Male" and "Female")

In [2]:
train_data , test_data = load_preprocessed_adult()

Define statistical_parity functions using sample weights

In [3]:
def sample_weight_mean_difference(group_a, group_b, y, sample_weight=None):
    if sample_weight is None:
        sample_weight = np.ones_like(y)
        
    group_a = group_a.squeeze()
    group_b = group_b.squeeze()
    y = y.values.squeeze()
    return np.abs(sample_weight[group_a & y==1].sum() - sample_weight[group_b & y==1].sum())

In [4]:
x, y, group_a, group_b = train_data

sample_weight = np.ones(len(y))

sample_weight_mean_difference(group_a, group_b, y)

6534

In [5]:
from holisticai.bias.mitigation import Reweighing

x, y, group_a, group_b = train_data

bm = Reweighing()
bm.fit(y, group_a, group_b)
sample_weight = bm.estimator_params['sample_weight']

sample_weight_mean_difference(group_a, group_b, y, sample_weight)

3168.273

# Baseline

In [6]:
x, y, group_a, group_b = train_data

scaler = StandardScaler()
xt = scaler.fit_transform(x)

model = LogisticRegression()
model.fit(xt,y)

x, y, group_a, group_b = test_data
xt = scaler.transform(x)

y_pred = model.predict(xt)

df_bl = classification_bias_metrics(group_a, group_b, y_pred, y, metric_type='both')

# Using Reweighing Bias Mitigation

In [7]:
from holisticai.bias.mitigation import Reweighing

x, y, group_a, group_b = train_data

scaler = StandardScaler()
xt = scaler.fit_transform(x)

bm = Reweighing()
bm.fit(y, group_a, group_b)
sample_weight = bm.estimator_params['sample_weight']

model = LogisticRegression()
model.fit(xt, y, sample_weight=sample_weight)

x, y, group_a, group_b = train_data
xt = scaler.transform(x)

y_pred = model.predict(xt)

df_rw = classification_bias_metrics(group_a, group_b, y_pred, y, metric_type='both')

# Comparing Results

In [8]:
table = pd.concat([df_bl, df_rw], axis=1).iloc[:,[0,2,3]]
table.columns = ['Baseline', 'Reweighing', 'Reference']
table

Unnamed: 0_level_0,Baseline,Reweighing,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Statistical Parity,-0.178353,-0.092602,0
Disparate Impact,0.309067,0.56555,1
Four Fifths Rule,0.309067,0.56555,1
Cohen D,-0.457755,-0.241252,0
Equality of Opportunity Difference,-0.059581,0.115541,0
False Positive Rate Difference,-0.082695,-0.013498,0
Average Odds Difference,-0.071138,0.051021,0
Accuracy Difference,0.122388,0.099645,0
