## Packages Installation

First, install the `holisticai` package if you haven't already:
```bash
!pip install holisticai[all]
```
Then, import the necessary libraries.

In [2]:
import pandas as pd
from holisticai.bias.metrics import classification_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

## Dataset loading

In [3]:
dataset = load_dataset('adult', protected_attribute='sex')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train = train_test['train']
test = train_test['test']

dataset

# 1 . Correlation Remover

### Traditional Implementation

In [4]:
# Define postprocessing model
from holisticai.bias.mitigation import CorrelationRemover

mitigator = CorrelationRemover()
mitigator

In [5]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
X_train_pre = mitigator.fit_transform(X_train, group_a=train['group_a'], group_b=train['group_b'])
model.fit(X_train_pre, train['y'])

# Predict on test data
X_test = scaler.transform(test['X'])
X_test_pre = mitigator.transform(X_test, group_a= test['group_a'], group_b=test['group_b'])
y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.088938,0
Disparate Impact,1.7323,1
Four Fifths Rule,0.577267,1
Cohen D,0.232029,0
2SD Rule,10.260307,0
Equality of Opportunity Difference,-0.099203,0
False Positive Rate Difference,0.011345,0
Average Odds Difference,-0.043929,0
Accuracy Difference,-0.097202,0


### Pipeline Implementation

In [6]:
# Define postprocessing model
mitigator = CorrelationRemover()
mitigator

In [7]:
# Define pretprocessing model
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])
pipeline

In [9]:
# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.088938,0
Disparate Impact,1.7323,1
Four Fifths Rule,0.577267,1
Cohen D,0.232029,0
2SD Rule,10.260307,0
Equality of Opportunity Difference,-0.099203,0
False Positive Rate Difference,0.011345,0
Average Odds Difference,-0.043929,0
Accuracy Difference,-0.097202,0


### Comparison

In [10]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.088938,0.088938,0
Disparate Impact,1.7323,1.7323,1
Four Fifths Rule,0.577267,0.577267,1
Cohen D,0.232029,0.232029,0
2SD Rule,10.260307,10.260307,0
Equality of Opportunity Difference,-0.099203,-0.099203,0
False Positive Rate Difference,0.011345,0.011345,0
Average Odds Difference,-0.043929,-0.043929,0
Accuracy Difference,-0.097202,-0.097202,0


# 2. Disparate Impact Remover

### Traditional Implementation

In [11]:
# Define postprocessing model
from holisticai.bias.mitigation import DisparateImpactRemover
mitigator = DisparateImpactRemover()
mitigator

In [12]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])

# Fit model
X_train_pre = mitigator.fit_transform(X_train, group_a=train['group_a'], group_b=train['group_b'])
model.fit(X_train_pre, train['y'])

# Predict on test data
X_test = scaler.transform(test['X'])
X_test_pre = mitigator.transform(X_test, group_a=test['group_a'], group_b=test['group_b'])
y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.453053,0
Disparate Impact,7.791141,1
Four Fifths Rule,0.128351,1
Cohen D,1.041918,0
2SD Rule,41.661359,0
Equality of Opportunity Difference,0.431984,0
False Positive Rate Difference,0.340087,0
Average Odds Difference,0.386036,0
Accuracy Difference,-0.204678,0


### Pipeline Implementation

In [13]:
# Define preprocessing model
mitigator = DisparateImpactRemover()
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.453053,0
Disparate Impact,7.791141,1
Four Fifths Rule,0.128351,1
Cohen D,1.041918,0
2SD Rule,41.661359,0
Equality of Opportunity Difference,0.431984,0
False Positive Rate Difference,0.340087,0
Average Odds Difference,0.386036,0
Accuracy Difference,-0.204678,0


### Comparison

In [14]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Statistical Parity,0.453053,0.453053,0
Disparate Impact,7.791141,7.791141,1
Four Fifths Rule,0.128351,0.128351,1
Cohen D,1.041918,1.041918,0
2SD Rule,41.661359,41.661359,0
Equality of Opportunity Difference,0.431984,0.431984,0
False Positive Rate Difference,0.340087,0.340087,0
Average Odds Difference,0.386036,0.386036,0
Accuracy Difference,-0.204678,-0.204678,0


# 3. Learning Fair Representations

### Traditional Implementation

In [15]:
# Define postprocessing model
from holisticai.bias.mitigation import LearningFairRepresentation

mitigator = LearningFairRepresentation(k=10, Ax=0.2, Ay=2.0, Az=4.0, verbose=1, maxiter=100, seed=100)
mitigator

In [16]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])
X_train_pre = mitigator.fit_transform(X_train, train['y'], group_a=train['group_a'], group_b=train['group_b'])
model.fit(X_train_pre, train['y'])

# Predict on test data
X_test = scaler.transform(test['X'])
X_test_pre = mitigator.transform(X_test, group_a=test['group_a'], group_b=test['group_b'])
y_pred = model.predict(X_test_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Statistical Parity,0.071144,0
Disparate Impact,3.63231,1
Four Fifths Rule,0.275307,1
Cohen D,0.271978,0
2SD Rule,12.000883,0
Equality of Opportunity Difference,0.100202,0
False Positive Rate Difference,0.041541,0
Average Odds Difference,0.070872,0
Accuracy Difference,-0.170401,0


### Pipeline Implementation

In [17]:
# Define postprocessing model
mitigator = LearningFairRepresentation(k=10, Ax=0.2, Ay=2.0, Az=4.0, verbose=1, maxiter=100, seed=100)
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

### Comparison

In [None]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

# 4. Reweighing

### Traditional Implementation

In [None]:
# Define preprocessing model
from holisticai.bias.mitigation import Reweighing
mitigator = Reweighing()
mitigator

In [None]:
model = LogisticRegression()

# Standardize data and fit model
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])

# Fit model
mitigator.fit(train['y'], group_a=train['group_a'], group_b=train['group_b'])
sw = mitigator.estimator_params["sample_weight"]
model = LogisticRegression()
model.fit(X_train, train['y'], sw)

# Mitigator transform and model predict
X_test = scaler.transform(test['X'])
X_pre = mitigator.transform(X_test, group_a=test['group_a'], group_b=test['group_b'])
y_pred = model.predict(X_pre)

# Evaluate bias metrics
metrics = classification_bias_metrics(test['group_a'], test['group_b'], y_pred, test['y'], metric_type='both')
metrics

### Pipeline Implementation

In [None]:
# Define preprocessing model
mitigator = Reweighing()
model = LogisticRegression()

# Define pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("bm_preprocessing", mitigator), ("estimator", model),])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# Make predictions
y_pred_pipeline = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# Evaluate bias metrics for pipeline model
metrics_pipeline = classification_bias_metrics(test['group_a'], test['group_b'], y_pred_pipeline, test['y'], metric_type='both')
metrics_pipeline

### Comparison

In [None]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])