## Data Loading and Packages Installation

First, install the `holisticai` package if you haven't already:
```bash
!pip install holisticai[all]
```
Then, import the necessary libraries.

In [1]:
import warnings

import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")

## Dataset loading

In [2]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train = train_test['train']
test = train_test['test']

dataset

# 1 . Calibrated Equalized Odds

### Traditional Implementation

In [3]:
# Define postprocessing model
from holisticai.bias.mitigation import CalibratedEqualizedOdds

mitigator = CalibratedEqualizedOdds(cost_constraint="fnr")
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])

# Fit postprocessing model
y_pred = model.predict_proba(X_train)
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(test['y'], y_pred, test['group_a'], test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.109207,0
Disparate Impact,2.406222,1
Four Fifths Rule,0.415589,1
Cohen D,0.307662,0
2SD Rule,13.545237,0
Equality of Opportunity Difference,-0.06592,0
False Positive Rate Difference,0.05058,0
Average Odds Difference,-0.00767,0
Accuracy Difference,-0.144302,0


### Pipeline Implementation

In [4]:
# Define postprocessing model
mitigator = CalibratedEqualizedOdds(cost_constraint="fnr")
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("estimator", model), ("bm_posprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.109207,0
Disparate Impact,2.406222,1
Four Fifths Rule,0.415589,1
Cohen D,0.307662,0
2SD Rule,13.545237,0
Equality of Opportunity Difference,-0.06592,0
False Positive Rate Difference,0.05058,0
Average Odds Difference,-0.00767,0
Accuracy Difference,-0.144302,0


### Comparison

In [5]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.109207,0.109207,0
Disparate Impact,2.406222,2.406222,1
Four Fifths Rule,0.415589,0.415589,1
Cohen D,0.307662,0.307662,0
2SD Rule,13.545237,13.545237,0
Equality of Opportunity Difference,-0.06592,-0.06592,0
False Positive Rate Difference,0.05058,0.05058,0
Average Odds Difference,-0.00767,-0.00767,0
Accuracy Difference,-0.144302,-0.144302,0


# 2. Equalized Odds

### Traditional Implementation

In [6]:
# Define postprocessing model
from holisticai.bias.mitigation import EqualizedOdds

mitigator = EqualizedOdds(solver='highs', seed=42)

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict(X_train)

# Fit postprocessing model
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict(X_test)
y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.090176,0
Disparate Impact,1.623133,1
Four Fifths Rule,0.616093,1
Cohen D,0.224294,0
2SD Rule,9.922042,0
Equality of Opportunity Difference,0.036712,0
False Positive Rate Difference,-0.005762,0
Average Odds Difference,0.015475,0
Accuracy Difference,-0.05682,0


### Pipeline Implementation

In [7]:
# Define postprocessing model
mitigator = EqualizedOdds(solver='highs', seed=42)
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("estimator", model), ("bm_posprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.090176,0
Disparate Impact,1.623133,1
Four Fifths Rule,0.616093,1
Cohen D,0.224294,0
2SD Rule,9.922042,0
Equality of Opportunity Difference,0.036712,0
False Positive Rate Difference,-0.005762,0
Average Odds Difference,0.015475,0
Accuracy Difference,-0.05682,0


### Comparison

In [8]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.090176,0.090176,0
Disparate Impact,1.623133,1.623133,1
Four Fifths Rule,0.616093,0.616093,1
Cohen D,0.224294,0.224294,0
2SD Rule,9.922042,9.922042,0
Equality of Opportunity Difference,0.036712,0.036712,0
False Positive Rate Difference,-0.005762,-0.005762,0
Average Odds Difference,0.015475,0.015475,0
Accuracy Difference,-0.05682,-0.05682,0


# 3. LP Debiaser

### Traditional Implementation

In [9]:
# Define postprocessing model
from holisticai.bias.mitigation import LPDebiaserBinary

mitigator = LPDebiaserBinary()
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_proba = model.predict_proba(X_train)

# Fit postprocessing model
mitigator.fit(y=train['y'], y_proba=y_proba, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_proba=y_pred, group_a=test['group_a'], group_b=test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.111227,0
Disparate Impact,1.383391,1
Four Fifths Rule,0.722862,1
Cohen D,0.232339,0
2SD Rule,10.273823,0
Equality of Opportunity Difference,0.024032,0
False Positive Rate Difference,-0.003132,0
Average Odds Difference,0.01045,0
Accuracy Difference,0.011012,0


### Pipeline Implementation

In [10]:
# Define postprocessing model
mitigator = LPDebiaserBinary()
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("estimator", model), ("bm_posprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.111227,0
Disparate Impact,1.383391,1
Four Fifths Rule,0.722862,1
Cohen D,0.232339,0
2SD Rule,10.273823,0
Equality of Opportunity Difference,0.006014,0
False Positive Rate Difference,-0.000816,0
Average Odds Difference,0.002599,0
Accuracy Difference,0.006906,0


### Comparison

In [11]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.111227,0.111227,0
Disparate Impact,1.383391,1.383391,1
Four Fifths Rule,0.722862,0.722862,1
Cohen D,0.232339,0.232339,0
2SD Rule,10.273823,10.273823,0
Equality of Opportunity Difference,0.024032,0.006014,0
False Positive Rate Difference,-0.003132,-0.000816,0
Average Odds Difference,0.01045,0.002599,0
Accuracy Difference,0.011012,0.006906,0


# 4. ML Debiaser

### Traditional Implementation

In [12]:
# Define postprocessing model
from holisticai.bias.mitigation import MLDebiaser

mitigator = MLDebiaser()
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)

# Fit postprocessing model
mitigator.fit(y_pred, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, test['group_a'], test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

[elapsed time: 00:00:01 | iter:2/2]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.084875,0
Disparate Impact,1.552537,1
Four Fifths Rule,0.644107,1
Cohen D,0.208987,0
2SD Rule,9.251539,0
Equality of Opportunity Difference,-0.161117,0
False Positive Rate Difference,0.008152,0
Average Odds Difference,-0.076483,0
Accuracy Difference,-0.0884,0


### Pipeline Implementation

In [13]:
# Define postprocessing model
mitigator = MLDebiaser()
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("estimator", model), ("bm_posprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

[elapsed time: 00:00:00 | iter:2/2]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.084875,0
Disparate Impact,1.552537,1
Four Fifths Rule,0.644107,1
Cohen D,0.208987,0
2SD Rule,9.251539,0
Equality of Opportunity Difference,-0.161117,0
False Positive Rate Difference,0.008152,0
Average Odds Difference,-0.076483,0
Accuracy Difference,-0.0884,0


### Comparison

In [14]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.084875,0.084875,0
Disparate Impact,1.552537,1.552537,1
Four Fifths Rule,0.644107,0.644107,1
Cohen D,0.208987,0.208987,0
2SD Rule,9.251539,9.251539,0
Equality of Opportunity Difference,-0.161117,-0.161117,0
False Positive Rate Difference,0.008152,0.008152,0
Average Odds Difference,-0.076483,-0.076483,0
Accuracy Difference,-0.0884,-0.0884,0


# 5. Reject Option

### Traditional Implementation

In [15]:
# Define postprocessing model
from holisticai.bias.mitigation import RejectOptionClassification

mitigator = RejectOptionClassification(metric_name="Statistical parity difference")
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)

# Fit postprocessing model
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])

# Predict and mitigate bias
X_test = scaler.transform(test['X'])
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, y_proba, test['group_a'], test['group_b'])["y_pred"]

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

### Pipeline Implementation

In [None]:
# Define postprocessing model
mitigator = RejectOptionClassification(metric_name="Statistical parity difference")
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("estimator", model), ("bm_posprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

### Comparison

In [None]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])