## Packages Installation

First, install the `holisticai` package if you haven't already:
```bash
!pip install holisticai[all]
```
Then, import the necessary libraries.

# Setup Logging

In [17]:
import logging
import sys
import warnings

warnings.filterwarnings("ignore")

logger = logging.getLogger()
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

In [18]:
import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

## Dataset loading

In [19]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train = train_test['train']
test = train_test['test']

dataset

## Training a Model

In [20]:
# Define inprocessing model
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(train['X'], train['y'])

# Standardize data and predict
X_test = scaler.transform(test['X'])
y_pred = model.predict(X_test)

# Evaluate bias metrics
metrics = {}
metrics['model'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['model']

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.386984,0
Disparate Impact,2.845278,1
Four Fifths Rule,0.35146,1
Cohen D,0.831806,0
2SD Rule,34.481,0
Equality of Opportunity Difference,0.214329,0
False Positive Rate Difference,0.324442,0
Average Odds Difference,0.269386,0
Accuracy Difference,-0.196806,0


Now that we have a clean dataset we can start defining the training and testing sets.

# 1. Adversarial Debiasing

### Traditional Implementation

In [21]:
# Define inprocessing model
from holisticai.bias.mitigation import AdversarialDebiasing

mitigator = AdversarialDebiasing(features_dim=train['X'].shape[1], batch_size=512, hidden_size=64, 
                                 adversary_loss_weight=3, verbose=1, use_debias=True, seed=42).transform_estimator()
mitigator

In [22]:
# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
mitigator.fit(X_train, train['y'], train['group_a'], train['group_b'])
X_test = scaler.transform(test['X'])
y_pred = mitigator.predict(X_test)

# Evaluate bias metrics
metrics['Adversarial'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['Adversarial']

2024-11-06 08:51:55,003 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - Step 1/710: Classifier Loss = 0.669206, Adversarial Loss = 0.760739
2024-11-06 08:51:55,003 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - Step 1/710: Classifier Loss = 0.669206, Adversarial Loss = 0.760739
2024-11-06 08:52:01,116 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - Step 101/710: Classifier Loss = 0.380915, Adversarial Loss = 0.621280
2024-11-06 08:52:01,116 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - Step 101/710: Classifier Loss = 0.380915, Adversarial Loss = 0.621280
2024-11-06 08:52:07,447 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - Step 201/710: Classifier Loss = 0.365841, Adversarial Loss = 0.620069
2024-11-06 08:52:07,447 - holisticai.bias.mitigation.inprocessing.adversarial_debiasing.transformer - INFO - 

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.015605,0
Disparate Impact,1.10912,1
Four Fifths Rule,0.901616,1
Cohen D,0.043291,0
2SD Rule,1.925148,0
Equality of Opportunity Difference,-0.263542,0
False Positive Rate Difference,-0.03395,0
Average Odds Difference,-0.148746,0
Accuracy Difference,-0.101258,0


# 2. Exponentiated Gradient

### Traditional Implementation

In [23]:
# Define inprocessing model
from holisticai.bias.mitigation import ExponentiatedGradientReduction

model = LogisticRegression()
mitigator = ExponentiatedGradientReduction(constraints="DemographicParity").transform_estimator(model)

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
mitigator.fit(X_train, train['y'], train['group_a'], train['group_b'])

# Standardize data and predict
X_test = scaler.transform(test['X'])
y_pred = mitigator.predict(X_test)

# Evaluate bias metrics
metrics['Exponentiated'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['Exponentiated']

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,-0.003649,0
Disparate Impact,0.976761,1
Four Fifths Rule,0.976761,1
Cohen D,-0.010095,0
2SD Rule,-0.449018,0
Equality of Opportunity Difference,-0.318418,0
False Positive Rate Difference,-0.046256,0
Average Odds Difference,-0.182337,0
Accuracy Difference,-0.099424,0


# 3. Grid Search Reduction

### Traditional Implementation

In [24]:
# Define inprocessing model
from holisticai.bias.mitigation import GridSearchReduction

model = LogisticRegression()
mitigator = GridSearchReduction(constraints="EqualizedOdds", grid_size=20).transform_estimator(model)

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
mitigator.fit(X_train, train['y'], train['group_a'], train['group_b'])
X_test = scaler.transform(test['X'])
y_pred = mitigator.predict(X_test)

# Evaluate bias metrics
metrics['GridSearch'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['GridSearch']

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,-0.089742,0
Disparate Impact,0.771503,1
Four Fifths Rule,0.771503,1
Cohen D,-0.191323,0
2SD Rule,-8.476006,0
Equality of Opportunity Difference,-0.771488,0
False Positive Rate Difference,0.064087,0
Average Odds Difference,-0.3537,0
Accuracy Difference,-0.242953,0


# 4. Meta Fair Classifier

### Traditional Implementation

In [25]:
# Define inprocessing model
from holisticai.bias.mitigation import MetaFairClassifier

model = LogisticRegression()
mitigator = MetaFairClassifier(constraint="StatisticalRate", verbose=1, seed=42).transform_estimator(_)

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
mitigator.fit(X_train, train['y'], train['group_a'], train['group_b'])

# Standardize data and predict
X_test = scaler.transform(test['X'])
y_pred = mitigator.predict(X_test)

# Evaluate bias metrics
metrics['MetaClassifier'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['MetaClassifier']

[elapsed time: 00:00:02 | iter:8/8 | accuracy:0.6892 | gamma:0.6733]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,-0.078948,0
Disparate Impact,0.850056,1
Four Fifths Rule,0.850056,1
Cohen D,-0.158559,0
2SD Rule,-7.03326,0
Equality of Opportunity Difference,-0.06791,0
False Positive Rate Difference,-0.194712,0
Average Odds Difference,-0.131311,0
Accuracy Difference,0.184129,0


### Pipeline Implementation

# 5. Prejudice Remover

### Traditional Implementation

In [26]:
# Define inprocessing model
from holisticai.bias.mitigation import PrejudiceRemover

model = LogisticRegression()
mitigator = PrejudiceRemover(maxiter=100, fit_intercept=True, verbose=1, print_interval=1).transform_estimator(model)

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
mitigator.fit(X_train, train['y'], train['group_a'], train['group_b'])

# Standardize data and predict
X_test = scaler.transform(test['X'])
y_pred = mitigator.predict(X_test, test['group_a'], test['group_b'])

# Evaluate bias metrics
metrics['PrejudiceRemover'] = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics['PrejudiceRemover']

[elapsed time: 00:00:01 | iter:5/100 | loss:13018.0391]
[elapsed time: 00:00:07 | Best Loss : 13018.0391]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.123365,0
Disparate Impact,2.226515,1
Four Fifths Rule,0.449132,1
Cohen D,0.321904,0
2SD Rule,14.158636,0
Equality of Opportunity Difference,-0.001545,0
False Positive Rate Difference,0.036702,0
Average Odds Difference,0.017578,0
Accuracy Difference,-0.103027,0


In [27]:
from holisticai.utils import concatenate_metrics

concatenate_metrics(metrics)

Unnamed: 0_level_0,model,Adversarial,Exponentiated,GridSearch,MetaClassifier,PrejudiceRemover,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Statistical Parity,0.386984,0.015605,-0.003649,-0.089742,-0.078948,0.123365,0
Disparate Impact,2.845278,1.10912,0.976761,0.771503,0.850056,2.226515,1
Four Fifths Rule,0.35146,0.901616,0.976761,0.771503,0.850056,0.449132,1
Cohen D,0.831806,0.043291,-0.010095,-0.191323,-0.158559,0.321904,0
2SD Rule,34.481,1.925148,-0.449018,-8.476006,-7.03326,14.158636,0
Equality of Opportunity Difference,0.214329,-0.263542,-0.318418,-0.771488,-0.06791,-0.001545,0
False Positive Rate Difference,0.324442,-0.03395,-0.046256,0.064087,-0.194712,0.036702,0
Average Odds Difference,0.269386,-0.148746,-0.182337,-0.3537,-0.131311,0.017578,0
Accuracy Difference,-0.196806,-0.101258,-0.099424,-0.242953,0.184129,-0.103027,0
