# **Mitigating Bias in multiclass classification**


In [None]:
# sys path
import sys
sys.path = ['../../']+sys.path

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from holisticai.bias.metrics import multiclass_bias_metrics
from holisticai.pipeline import Pipeline
from holisticai.utils.transformers.bias import SensitiveGroups
from tests.testing_utils._tests_data_utils import load_preprocessed_us_crime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Data Preprocessing

In [None]:
train_data , test_data = load_preprocessed_us_crime(nb_classes=5)
_, _, group_a, group_b = train_data
sensgroup = SensitiveGroups()

## Baseline

In [None]:
pipeline = Pipeline(
    steps=[
        ('scalar', StandardScaler()),
        ("model", LogisticRegression()),
    ]
)

X, y, group_a, group_b = train_data

pipeline.fit(X, y)

X, y, group_a, group_b = test_data

y_pred = pipeline.predict(X)

p_attr = sensgroup.fit_transform(np.stack([group_a,group_b], axis=1), convert_numeric=True)

df = multiclass_bias_metrics(
    p_attr,
    y_pred,
    y,
    metric_type='both'
)
y_baseline = y_pred.copy()
df_baseline=df.copy()
df_baseline

## ML Debiaser

In [None]:
from holisticai.bias.mitigation import MLDebiaser
np.random.seed(10)

pipeline = Pipeline(
    steps=[
        ('scalar', StandardScaler()),
        ("model", LogisticRegression()),
        ("bm_postprocessing", MLDebiaser(sgd_steps=10_000,
                       full_gradient_epochs=500, 
                       max_iter=5)),
    ]
)

X, y, group_a, group_b = train_data
fit_params = {
    "bm__group_a": group_a, 
    "bm__group_b": group_b
}

pipeline.fit(X, y, **fit_params)

X, y, group_a, group_b = test_data
predict_params = {
    "bm__group_a": group_a,
    "bm__group_b": group_b,
}
y_pred = pipeline.predict(X, **predict_params)

p_attr = sensgroup.transform(np.stack([group_a,group_b], axis=1), convert_numeric=True)

df = multiclass_bias_metrics(
    p_attr,
    y_pred,
    y,
    metric_type='both'
)
y_mldebiaser  = y_pred.copy()
df_mldebiaser = df.copy()
df_mldebiaser

## Reweighing

In [None]:
from holisticai.bias.mitigation import Reweighing
np.random.seed(10)

pipeline = Pipeline(
    steps=[
        ('scalar', StandardScaler()),
        ("bm_preprocessing", Reweighing()),
        ("model", LogisticRegression()),
    ]
)

X, y, group_a, group_b = train_data
fit_params = {
    "bm__group_a": group_a, 
    "bm__group_b": group_b
}

pipeline.fit(X, y, **fit_params)

X, y, group_a, group_b = test_data
predict_params = {
    "bm__group_a": group_a,
    "bm__group_b": group_b,
}
y_pred = pipeline.predict(X, **predict_params)

p_attr = sensgroup.transform(np.stack([group_a,group_b], axis=1), convert_numeric=True)

df = multiclass_bias_metrics(
    p_attr,
    y_pred,
    y,
    metric_type='both'
)
y_rw  = y_pred.copy()
df_rw = df.copy()
df_rw

In [None]:
from holisticai.bias.mitigation import CorrelationRemover

pipeline = Pipeline(
    steps=[
        ('scalar', StandardScaler()),
        ("bm_preprocessing", CorrelationRemover()),
        ("model", LogisticRegression()),
    ]
)

X, y, group_a, group_b = train_data
fit_params = {
    "bm__group_a": group_a, 
    "bm__group_b": group_b
}

pipeline.fit(X, y, **fit_params)

X, y, group_a, group_b = test_data
predict_params = {
    "bm__group_a": group_a,
    "bm__group_b": group_b,
}
y_pred = pipeline.predict(X, **predict_params)

p_attr = sensgroup.transform(np.stack([group_a,group_b], axis=1), convert_numeric=True)

df = multiclass_bias_metrics(
    p_attr,
    y_pred,
    y,
    metric_type='both'
)
y_cr  = y_pred.copy()
df_cr = df.copy()
df_cr

In [None]:
result = pd.concat([df_baseline, df_rw, df_cr,df_mldebiaser], axis=1).iloc[:, [0,2,4,6,7]]
result.columns = ['Baseline','Reweighing','Correlation Remover','ML Debiaser','Reference']
result

### Fair Score Classifier

In [None]:
from holisticai.bias.mitigation import FairScoreClassifier

In [None]:
X, y, group_a, group_b = train_data
# Convert y to one-hot
y = pd.get_dummies(y).values

In [None]:
# Define and train the model
objectives = "ba"
constraints = {}
model = FairScoreClassifier(objectives, constraints, time_limit=200)
model.fit(X, y, group_a, group_b)

In [None]:
# Make the predictions
X, y, group_a, group_b = test_data
ypred = model.predict(X, group_a, group_b)
p_attr = sensgroup.fit_transform(np.stack([group_a,group_b], axis=1), convert_numeric=True)

In [None]:
multiclass_bias_metrics(
    p_attr,
    ypred,
    y,
    metric_type='both'
)

### Fair Score Classifier - pipeline

In [None]:
objectives = "ba"
constraints = {}

pipeline = Pipeline(
    steps=[
        ('scalar', StandardScaler()),
        ("bm_inprocessing", FairScoreClassifier(objectives, constraints, time_limit=200)),
    ]
)

In [None]:
X, y, group_a, group_b = train_data
# Convert y to one-hot
y = pd.get_dummies(y).values

fit_params = {
    "bm__group_a": group_a, 
    "bm__group_b": group_b
}

pipeline.fit(X, y, **fit_params)

In [None]:
X, y, group_a, group_b = test_data
predict_params = {
    "bm__group_a": group_a,
    "bm__group_b": group_b,
}
y_pred = pipeline.predict(X, **predict_params)

In [None]:
multiclass_bias_metrics(
    p_attr,
    y_pred,
    y,
    metric_type='both'
)