In [39]:
# Healthcare CSV file: diabetes.csv
# Goal: Analyze bias in healthcare data and mitigate it using DALEX python package


In [40]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import dataset
dataset = pd.read_csv('diabetes.csv')


In [41]:
print(dataset)


     Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   
0             0          6.0      148           72.0           35.0     30.5  \
1             1          1.0       85           66.0           29.0     30.5   
2             2          8.0      183           64.0           23.0     30.5   
3             3          1.0       89           66.0           23.0     94.0   
4             4          0.0      137           40.0           35.0    168.0   
..          ...          ...      ...            ...            ...      ...   
758         763         10.0      101           76.0           23.0    180.0   
759         764          2.0      122           70.0           27.0     30.5   
760         765          5.0      121           72.0           23.0    112.0   
761         766          1.0      126           60.0           23.0     30.5   
762         767          1.0       93           70.0           31.0     30.5   

      BMI  DiabetesPedigreeFunction   A

# Models

We will make 3 predictive models


In [42]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# classifiers
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(dataset.drop(
    'Outcome', axis=1), dataset['Outcome'], test_size=0.2, random_state=100)
categorical_features = ['AgeCategory']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numerical_features = ['Pregnancies', 'Glucose', 'BloodPressure',
                      'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Decision Tree
dt = Pipeline(steps=[('preprocessor', preprocessor),
                     ('classifier', DecisionTreeClassifier(max_depth=7, random_state=100))])

dt.fit(X_train, y_train)

# Random Forest
rf = Pipeline(steps=[('preprocessor', preprocessor),
                     ('classifier', RandomForestClassifier(n_estimators=200, max_depth=7, random_state=100))])

rf.fit(X_train, y_train)

# Logistic Regression
lr = Pipeline(steps=[('preprocessor', preprocessor),
                     ('classifier', LogisticRegression())])

lr.fit(X_train, y_train)


# Explainers

dalex let us create an explainer for each model


In [43]:
import dalex as dx

exp_tree = dx.Explainer(dt, X_test, y_test, label='Decision Tree')
exp_forest = dx.Explainer(rf, X_test, y_test, label='Random Forest')
exp_logreg = dx.Explainer(lr, X_test, y_test, label='Logistic Regression')


Preparation of a new explainer is initiated

  -> data              : 153 rows 10 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 153 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : Decision Tree
  -> predict function  : <function yhat_proba_default at 0x7f600062d5a0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.399, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -1.0, mean = -0.0131, max = 1.0
  -> model_info        : package sklearn

A new explainer has been created!
Preparation of a new explainer is initiated

  -> data              : 153 rows 10 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted t

# Model Performance Measures


In [44]:
pd.concat([exp.model_performance().result for exp in [
          exp_logreg, exp_tree, exp_forest]])


Unnamed: 0,recall,precision,f1,accuracy,auc
Logistic Regression,0.59322,0.7,0.642202,0.745098,0.825279
Decision Tree,0.711864,0.677419,0.694215,0.75817,0.765056
Random Forest,0.694915,0.759259,0.725664,0.797386,0.846376


# Variable Importance

Measure the importance of each variable in the model using the variable importance function of the explainer


In [45]:
exp_tree.model_parts().plot(
    objects=[exp_forest.model_parts(), exp_logreg.model_parts()])


# Fairness: Age

We will focus on the age variable. We compute the Fairness using the model_fairness method


In [46]:
protected = np.where(X_test.Age >50, 'old', 'young')
privileged = 'young'

mf_tree = exp_tree.model_fairness(protected=protected,
                                  privileged=privileged)
mf_forest = exp_forest.model_fairness(protected=protected,
                                      privileged=privileged)
mf_logreg = exp_logreg.model_fairness(protected=protected,
                                      privileged=privileged)


## Now Check for Bias


In [47]:
mf_tree.fairness_check()


Bias detected in 3 metrics: TPR, FPR, STP

Conclusion: your model is not fair because 2 or more criteria exceeded acceptable limits set by epsilon.

Ratios of metrics, based on 'young'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
         TPR       ACC       PPV    FPR       STP
old  1.27551  1.010568  1.040119  1.665  1.539267


In [48]:
mf_forest.fairness_check()


Bias detected in 3 metrics: TPR, FPR, STP

Conclusion: your model is not fair because 2 or more criteria exceeded acceptable limits set by epsilon.

Ratios of metrics, based on 'young'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
          TPR       ACC       PPV       FPR       STP
old  1.545595  1.207959  1.212824  0.787234  1.598187


In [49]:
mf_logreg.fairness_check()


Bias detected in 3 metrics: TPR, FPR, STP

Conclusion: your model is not fair because 2 or more criteria exceeded acceptable limits set by epsilon.

Ratios of metrics, based on 'young'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
          TPR       ACC       PPV       FPR       STP
old  1.890359  1.121088  1.050578  2.361702  2.254355


### Visualize the result


In [50]:
mf_tree.plot(objects=[mf_forest, mf_logreg])


In [51]:
mf_tree.plot(objects=[mf_logreg, mf_forest], type='stacked')

The DecisionTree seems to have least parity loss

In [52]:
mf_tree.plot(objects=[mf_logreg, mf_forest],
             type="performance_and_fairness",
             fairness_metric="FPR",
             performance_metric="accuracy")

# Mitigation

In [53]:
from dalex.fairness import resample
from copy import copy

In [54]:
# copying: we consider only the decision tree classifier
clf_u = copy(dt)
clf_p = copy(dt)

## Resampling

In [55]:
# resample
indices_uniform = resample(protected, y_test, verbose=False)
indices_preferential = resample(protected,
                                y_test,
                                type='preferential',  # different type
                                probs=exp_tree.y_hat,  # requires probabilities
                                verbose=False)


clf_u.fit(X_test.iloc[indices_uniform, :], y_test.iloc[indices_uniform])
clf_p.fit(X_test.iloc[indices_preferential, :], y_test.iloc[indices_preferential])


In [56]:
# Now check if fairness is ok
exp3 = dx.Explainer(clf_u, X_test, y_test, verbose = False)
exp4 = dx.Explainer(clf_p, X_test, y_test, verbose = False)

mf_u = exp3.model_fairness(protected=protected, privileged=privileged, label="uniform")
mf_p = exp4.model_fairness(protected=protected, privileged=privileged, label="preferential")

mf_u.plot([mf_p])



In [57]:
mf_u.fairness_check()
mf_p.fairness_check()

No bias was detected!

Conclusion: your model is fair in terms of checked fairness criteria.

Ratios of metrics, based on 'young'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
          TPR       ACC       PPV  FPR      STP
old  0.892857  0.969104  1.060445  NaN  1.05641

Take into consideration that NaN's are present, consider checking 'metric_scores' plot to see the difference
No bias was detected!

Conclusion: your model is fair in terms of checked fairness criteria.

Ratios of metrics, based on 'young'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
          TPR       ACC       PPV  FPR      STP
old  0.892857  0.969104  1.060445  NaN  1.05641

Take into consideration that NaN's are present, consider checking 'metric_scores' plot to see the difference
