## Packages Installation

First, install the `holisticai` package if you haven't already:
```bash
!pip install holisticai[all]
```
Then, import the necessary libraries.

In [1]:
import warnings

import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")

## Dataset loading

In [2]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train_data = train_test['train']
test_data = train_test['test']

dataset

# 1 . Correlation Remover

### Traditional Implementation

In [3]:
# Define postprocessing model
from holisticai.bias.mitigation import CorrelationRemover

mitigator = CorrelationRemover()
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data['X'])
fit_params = {"group_a": train_data['group_a'], "group_b": train_data['group_b']}
X_train_pre = mitigator.fit_transform(X_train, **fit_params)
model.fit(X_train_pre, train_data['y'])
fit_params = {"group_a": test_data['group_a'], "group_b": test_data['group_b']}
X_test = scaler.transform(test_data['X'])
X_test_pre = mitigator.transform(X_test, **fit_params)

y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred, test_data['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.113132,0
Disparate Impact,2.026974,1
Four Fifths Rule,0.493346,1
Cohen D,0.293019,0
2SD Rule,12.912671,0
Equality of Opportunity Difference,-0.049182,0
False Positive Rate Difference,0.029078,0
Average Odds Difference,-0.010052,0
Accuracy Difference,-0.101191,0


### Pipeline Implementation

In [4]:
from holisticai.pipeline import Pipeline

# Define postprocessing model
mitigator = CorrelationRemover()
model = LogisticRegression()

# Implement Pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])

fit_params = {'bm__group_a': train_data['group_a'], 'bm__group_b': train_data['group_b']}
pipeline.fit(train_data['X'], train_data['y'], **fit_params)
predict_params = {'bm__group_a': test_data['group_a'], 'bm__group_b': test_data['group_b']}
y_pred_pipeline = pipeline.predict(test_data['X'], **predict_params)

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred_pipeline, test_data['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.113132,0
Disparate Impact,2.026974,1
Four Fifths Rule,0.493346,1
Cohen D,0.293019,0
2SD Rule,12.912671,0
Equality of Opportunity Difference,-0.049182,0
False Positive Rate Difference,0.029078,0
Average Odds Difference,-0.010052,0
Accuracy Difference,-0.101191,0


### Comparison

In [5]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.113132,0.113132,0
Disparate Impact,2.026974,2.026974,1
Four Fifths Rule,0.493346,0.493346,1
Cohen D,0.293019,0.293019,0
2SD Rule,12.912671,12.912671,0
Equality of Opportunity Difference,-0.049182,-0.049182,0
False Positive Rate Difference,0.029078,0.029078,0
Average Odds Difference,-0.010052,-0.010052,0
Accuracy Difference,-0.101191,-0.101191,0


# 2. Disparate Impact Remover

### Traditional Implementation

In [6]:
# Define postprocessing model
from holisticai.bias.mitigation import DisparateImpactRemover

mitigator = DisparateImpactRemover()
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data['X'])
fit_params = {"group_a": train_data['group_a'], "group_b": train_data['group_b']}
X_train_pre = mitigator.fit_transform(X_train, **fit_params)
model.fit(X_train_pre, train_data['y'])
fit_params = {"group_a": test_data['group_a'], "group_b": test_data['group_b']}
X_test = scaler.transform(test_data['X'])
X_test_pre = mitigator.transform(X_test, **fit_params)

y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred, test_data['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.450658,0
Disparate Impact,7.521151,1
Four Fifths Rule,0.132958,1
Cohen D,1.034583,0
2SD Rule,41.423844,0
Equality of Opportunity Difference,0.41483,0
False Positive Rate Difference,0.340255,0
Average Odds Difference,0.377543,0
Accuracy Difference,-0.207011,0


### Pipeline Implementation

In [7]:
from holisticai.pipeline import Pipeline

# Define postprocessing model
mitigator = DisparateImpactRemover()
model = LogisticRegression()

# Implement Pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])

fit_params = {'bm__group_a': train_data['group_a'], 'bm__group_b': train_data['group_b']}
pipeline.fit(train_data['X'], train_data['y'], **fit_params)
predict_params = {'bm__group_a': test_data['group_a'], 'bm__group_b': test_data['group_b']}
y_pred_pipeline = pipeline.predict(test_data['X'], **predict_params)

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred_pipeline, test_data['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.450658,0
Disparate Impact,7.521151,1
Four Fifths Rule,0.132958,1
Cohen D,1.034583,0
2SD Rule,41.423844,0
Equality of Opportunity Difference,0.41483,0
False Positive Rate Difference,0.340255,0
Average Odds Difference,0.377543,0
Accuracy Difference,-0.207011,0


### Comparison

In [8]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.450658,0.450658,0
Disparate Impact,7.521151,7.521151,1
Four Fifths Rule,0.132958,0.132958,1
Cohen D,1.034583,1.034583,0
2SD Rule,41.423844,41.423844,0
Equality of Opportunity Difference,0.41483,0.41483,0
False Positive Rate Difference,0.340255,0.340255,0
Average Odds Difference,0.377543,0.377543,0
Accuracy Difference,-0.207011,-0.207011,0


# 3. Learning Fair Representations

### Traditional Implementation

In [9]:
# Define postprocessing model
from holisticai.bias.mitigation import LearningFairRepresentation

mitigator = LearningFairRepresentation(k=1, Ax=0.2, Ay=2.0, Az=4.0, verbose=1, maxiter=100, maxfun=100)
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data['X'])
fit_params = {"group_a": train_data['group_a'], "group_b": train_data['group_b']}
X_train_pre = mitigator.fit_transform(X_train, train_data['y'], **fit_params)
model.fit(X_train_pre, train_data['y'])
fit_params = {"group_a": test_data['group_a'], "group_b": test_data['group_b']}
X_test = scaler.transform(test_data['X'])
X_test_pre = mitigator.transform(X_test, **fit_params)

y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred, test_data['y'], metric_type='both')
metrics

Optimization Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Optimization Progress: 252it [00:09, 33.63it/s, loss: 1.824 L_x: 2.694 L_y: 0.642 L_z: 0.000]                          

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.0,0
Disparate Impact,,1
Four Fifths Rule,,1
Cohen D,,0
2SD Rule,,0
Equality of Opportunity Difference,0.0,0
False Positive Rate Difference,0.0,0
Average Odds Difference,0.0,0
Accuracy Difference,-0.191532,0


### Pipeline Implementation

In [10]:
from holisticai.pipeline import Pipeline

# Define postprocessing model
mitigator = LearningFairRepresentation(k=1, Ax=0.2, Ay=2.0, Az=4.0, verbose=1, maxiter=100, maxfun=100)
model = LogisticRegression()

# Implement Pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])

fit_params = {'bm__group_a': train_data['group_a'], 'bm__group_b': train_data['group_b']}
pipeline.fit(train_data['X'], train_data['y'], **fit_params)
predict_params = {'bm__group_a': test_data['group_a'], 'bm__group_b': test_data['group_b']}
y_pred_pipeline = pipeline.predict(test_data['X'], **predict_params)

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred_pipeline, test_data['y'], metric_type='both')
metrics_pipeline



Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.0,0
Disparate Impact,,1
Four Fifths Rule,,1
Cohen D,,0
2SD Rule,,0
Equality of Opportunity Difference,0.0,0
False Positive Rate Difference,0.0,0
Average Odds Difference,0.0,0
Accuracy Difference,-0.191532,0


### Comparison

In [11]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.0,0.0,0
Disparate Impact,,,1
Four Fifths Rule,,,1
Cohen D,,,0
2SD Rule,,,0
Equality of Opportunity Difference,0.0,0.0,0
False Positive Rate Difference,0.0,0.0,0
Average Odds Difference,0.0,0.0,0
Accuracy Difference,-0.191532,-0.191532,0


# 4. Reweighing

### Traditional Implementation

In [12]:
# Define postprocessing model
from holisticai.bias.mitigation import Reweighing

mitigator = Reweighing()
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data['X'])
fit_params = {"group_a": train_data['group_a'], "group_b": train_data['group_b']}

mitigator.fit(train_data['y'], **fit_params)
sw = mitigator.estimator_params["sample_weight"]
model = LogisticRegression()
model.fit(X_train, train_data['y'], sw)

# Mitigator transform and model predict
fit_params = {"group_a": test_data['group_a'], "group_b": test_data['group_b']}
X_test = scaler.transform(test_data['X'])
X_pre = mitigator.transform(X_test, **fit_params)
y_pred = model.predict(X_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred, test_data['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.09611,0
Disparate Impact,1.789128,1
Four Fifths Rule,0.558932,1
Cohen D,0.248225,0
2SD Rule,10.967284,0
Equality of Opportunity Difference,-0.109324,0
False Positive Rate Difference,0.018751,0
Average Odds Difference,-0.045286,0
Accuracy Difference,-0.101354,0


### Pipeline Implementation

In [13]:
from holisticai.pipeline import Pipeline

# Define postprocessing model
mitigator = Reweighing()
model = LogisticRegression()

# Implement Pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])

fit_params = {'bm__group_a': train_data['group_a'], 'bm__group_b': train_data['group_b']}
pipeline.fit(train_data['X'], train_data['y'], **fit_params)
predict_params = {'bm__group_a': test_data['group_a'], 'bm__group_b': test_data['group_b']}
y_pred_pipeline = pipeline.predict(test_data['X'], **predict_params)

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test_data['group_a'], test_data['group_b'], y_pred_pipeline, test_data['y'], metric_type='both')
metrics_pipeline

Optimization Progress: 252it [00:20, 33.63it/s, loss: 1.824 L_x: 2.694 L_y: 0.642 L_z: 0.000]

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.09611,0
Disparate Impact,1.789128,1
Four Fifths Rule,0.558932,1
Cohen D,0.248225,0
2SD Rule,10.967284,0
Equality of Opportunity Difference,-0.109324,0
False Positive Rate Difference,0.018751,0
Average Odds Difference,-0.045286,0
Accuracy Difference,-0.101354,0


### Comparison

In [14]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.09611,0.09611,0
Disparate Impact,1.789128,1.789128,1
Four Fifths Rule,0.558932,0.558932,1
Cohen D,0.248225,0.248225,0
2SD Rule,10.967284,10.967284,0
Equality of Opportunity Difference,-0.109324,-0.109324,0
False Positive Rate Difference,0.018751,0.018751,0
Average Odds Difference,-0.045286,-0.045286,0
Accuracy Difference,-0.101354,-0.101354,0


