## Packages Installation

First, install the `holisticai` package if you haven't already:
```bash
!pip install holisticai[all]
```
Then, import the necessary libraries.

In [1]:
import warnings

import pandas as pd
import numpy as np
from holisticai.bias.metrics import multiclass_bias_metrics
from holisticai.datasets import load_dataset
from holisticai.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

warnings.filterwarnings("ignore")

## Data Loading 

In [2]:
dataset = load_dataset('us_crime_multiclass')
train_test = dataset.train_test_split(test_size=0.2, random_state=42)

train = train_test['train']
test = train_test['test']

dataset

# 1. LP Debiaser Multiclass

### Traditional implementation

In [3]:
from holisticai.bias.mitigation import LPDebiaserMulticlass

mitigator = LPDebiaserMulticlass(constraint="EqualizedOpportunity")
model = LogisticRegression()

# set scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])

# fit model and predict
model.fit(X_train, train['y'])
y_pred = model.predict(X_train)

# fit mitigator
mitigator.fit(train['y'], y_pred, group_a=train['group_a'], group_b=train['group_b'])
X_test = scaler.transform(test['X'])

# predict and mitigate
y_pred = model.predict(X_test)
y_pred = mitigator.transform(y_pred, group_a=test['group_a'], group_b=test['group_b'])['y_pred']

# compute bias metrics
metrics = multiclass_bias_metrics(test['p_attr'], y_pred, test['y'], metric_type='both')
metrics

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Max Multiclass Statistical Parity,0.567852,0
Mean Multiclass Statistical Parity,0.567852,0
Max Multiclass Equality of Opportunity,0.385976,0
Max Multiclass Average Odds,0.351575,0
Max Multiclass True Positive Difference,0.265071,0
Mean Multiclass Equality of Opportunity,0.385976,0
Mean Multiclass Average Odds,0.351575,0
Mean Multiclass True Positive Difference,0.265071,0


### Pipeline Implementation

In [4]:
mitigator = LPDebiaserMulticlass(constraint="EqualizedOpportunity")
model = LogisticRegression()

# set pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("model", model), ("bm_postprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# predict on test set
y_pred = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# compute bias metrics
metrics_pipeline = multiclass_bias_metrics(test['p_attr'], y_pred, test['y'], metric_type='both')
metrics_pipeline

Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Max Multiclass Statistical Parity,0.567852,0
Mean Multiclass Statistical Parity,0.567852,0
Max Multiclass Equality of Opportunity,0.385976,0
Max Multiclass Average Odds,0.351575,0
Max Multiclass True Positive Difference,0.265071,0
Mean Multiclass Equality of Opportunity,0.385976,0
Mean Multiclass Average Odds,0.351575,0
Mean Multiclass True Positive Difference,0.265071,0


### Comparison

In [5]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Max Multiclass Statistical Parity,0.567852,0.567852,0
Mean Multiclass Statistical Parity,0.567852,0.567852,0
Max Multiclass Equality of Opportunity,0.385976,0.385976,0
Max Multiclass Average Odds,0.351575,0.351575,0
Max Multiclass True Positive Difference,0.265071,0.265071,0
Mean Multiclass Equality of Opportunity,0.385976,0.385976,0
Mean Multiclass Average Odds,0.351575,0.351575,0
Mean Multiclass True Positive Difference,0.265071,0.265071,0


# 2. ML Debiser Multiclass

### Traditional Implementation

In [6]:
from holisticai.bias.mitigation import MLDebiaser

mitigator = MLDebiaser()
model = LogisticRegression()

# set scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(train['X'])

# fit model and predict probabilities
model.fit(X_train, train['y'])
y_pred = model.predict_proba(X_train)

# fit mitigator
mitigator.fit(y_pred, group_a=train['group_a'], group_b=train['group_b'])
X_test = scaler.transform(test['X'])

# model predict and mitigator transform
y_pred = model.predict_proba(X_test)
y_pred = mitigator.transform(y_pred, group_a=test['group_a'], group_b=test['group_b'])['y_pred']

# compute bias metrics
metrics = multiclass_bias_metrics(test['p_attr'], y_pred, test['y'], metric_type='both')
metrics

[elapsed time: 00:00:04 | iter:5/5 | primal_residual::4.2072 | dual_residual::0.0312]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Max Multiclass Statistical Parity,0.420158,0
Mean Multiclass Statistical Parity,0.420158,0
Max Multiclass Equality of Opportunity,0.188144,0
Max Multiclass Average Odds,0.177132,0
Max Multiclass True Positive Difference,0.144376,0
Mean Multiclass Equality of Opportunity,0.188144,0
Mean Multiclass Average Odds,0.177132,0
Mean Multiclass True Positive Difference,0.144376,0


### Pipeline Implementation

In [7]:
mitigator = MLDebiaser()
model = LogisticRegression()

# set pipeline
pipeline = Pipeline(steps=[('scalar', StandardScaler()), ("model", model), ("bm_postprocessing", mitigator)])
pipeline.fit(train['X'], train['y'], bm__group_a=train['group_a'], bm__group_b=train['group_b'])

# predict on test set
y_pred = pipeline.predict(test['X'], bm__group_a=test['group_a'], bm__group_b=test['group_b'])

# compute bias metrics
metrics_pipeline = multiclass_bias_metrics(test['p_attr'], y_pred, test['y'], metric_type='both')
metrics_pipeline

[elapsed time: 00:00:04 | iter:5/5 | primal_residual::4.2072 | dual_residual::0.0312]


Unnamed: 0_level_0,Value,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Max Multiclass Statistical Parity,0.420158,0
Mean Multiclass Statistical Parity,0.420158,0
Max Multiclass Equality of Opportunity,0.188144,0
Max Multiclass Average Odds,0.177132,0
Max Multiclass True Positive Difference,0.144376,0
Mean Multiclass Equality of Opportunity,0.188144,0
Mean Multiclass Average Odds,0.177132,0
Mean Multiclass True Positive Difference,0.144376,0


### Comparison

In [8]:
pd.concat([metrics['Value'], metrics_pipeline], axis=1, keys=['Traditional', 'Pipeline'])

Unnamed: 0_level_0,Traditional,Pipeline,Pipeline
Unnamed: 0_level_1,Value,Value,Reference
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Max Multiclass Statistical Parity,0.420158,0.420158,0
Mean Multiclass Statistical Parity,0.420158,0.420158,0
Max Multiclass Equality of Opportunity,0.188144,0.188144,0
Max Multiclass Average Odds,0.177132,0.177132,0
Max Multiclass True Positive Difference,0.144376,0.144376,0
Mean Multiclass Equality of Opportunity,0.188144,0.188144,0
Mean Multiclass Average Odds,0.177132,0.177132,0
Mean Multiclass True Positive Difference,0.144376,0.144376,0
