In [37]:
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

import dalex as dx
import numpy as np
import pandas as pd
import sklearn

from copy import copy
import re
import plotly.express as px

In [38]:
print('loading data ...')

loading data ...


In [39]:
df, target = dx.datasets.load_german(), "risk"

In [40]:
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=[target]),
    df[target],
    test_size=0.3,
    random_state=42
)

X = df.drop(columns='risk')
y = df.risk

In [41]:
print('loading models ...')

loading models ...


In [42]:
def get_model_bias(modeltype, biastype):

    #BIAS PART
    regexp_both = re.compile(r'^sex[mf]+ [0-9]{2}[\+-]$')
    regexp_sex = re.compile(r'^sex[mf]$')
    regexp_age = re.compile(r'^[0-9]{2}[\+-]$')
    
    # If both age and sex
    if regexp_both.search(biastype):
        protected = X.sex + '_' + np.where(X.age < int(biastype[5:7]), 'young', 'old')
        mtype = 2
        if biastype[0:4]=="sexm":
            if biastype[7] == "+":
                privileged = "male_old"
            else:
                privileged = "male_young"
        else:
            if biastype[7] == "+":
                privileged = "female_old"
            else:
                privileged = "female_young"
                
    # If age only
    elif regexp_age.search(biastype):
        protected = np.where(X.age < int(biastype[0:2]), 'young', 'old')
        mtype = 1
        if biastype[2] == "+":
            privileged = "old"
        else:
            privileged = "young"
        
    # If sex only
    elif regexp_sex.search(biastype):
        protected = X.sex
        mtype = 0
        if biastype=="sexm":
            privileged = "male"
        else:
            privileged = "female"
        
    else:
        print("ERROR in bias definition ! ")
        return None
        
    # MODEL PART
    model = "m_"+modeltype
    if mtype==2: 
        model_rm = "m_"+modeltype+"_a_s"
    if mtype==1: 
        model_rm = "m_"+modeltype+"_a"
    if mtype==0: 
        model_rm = "m_"+modeltype+"_s"
        
    # OUTPUT
    dict_informations = {"model_name": model, "model_rm_name": model_rm, 
                         "protected": protected, "privileged": privileged,
                        }

    print("Everything looks good, let's continue ! You can run the following cell ! ")
    
    return dict_informations
#

In [43]:
# Preprocessing
preprocessor_std = make_column_transformer(
      (StandardScaler(), make_column_selector(dtype_include=np.number)),
      (OneHotEncoder(), make_column_selector(dtype_include=object))
)

preprocessor_rm_sex = make_column_transformer(
      (StandardScaler(), make_column_selector(dtype_include=np.number)),
      (OneHotEncoder(), ['housing', 'saving_accounts', 'checking_account', 'purpose'])
)

preprocessor_rm_age = make_column_transformer(
      (StandardScaler(), ['job', 'credit_amount', 'duration']),
      (OneHotEncoder(), make_column_selector(dtype_include=object))
)


preprocessor_rm_age_sex = make_column_transformer(
      (StandardScaler(), ['job', 'credit_amount', 'duration']),
      (OneHotEncoder(), ['housing', 'saving_accounts', 'checking_account', 'purpose'])
)


In [45]:
# Model & fir 
# Logistic regression
m_logreg = Pipeline(steps=[
    ('preprocessor', preprocessor_std),
    ('classifier', LogisticRegression())
])

m_logreg_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_sex),
    ('classifier', LogisticRegression())
])

m_logreg_a = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age),
    ('classifier', LogisticRegression())
])

m_logreg_a_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age_sex),
    ('classifier', LogisticRegression())
])


m_logreg.fit(X_train, y_train)
m_logreg_s.fit(X_train, y_train)
m_logreg_a.fit(X_train, y_train)
m_logreg_a_s.fit(X_train, y_train)

# Decision tree
m_decisiontree = Pipeline(steps=[
    ('preprocessor', preprocessor_std),
    ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))
])

m_decisiontree_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_sex),
    ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))
])

m_decisiontree_a = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age),
    ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))
])

m_decisiontree_a_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age_sex),
    ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))
])


m_decisiontree.fit(X_train, y_train)
m_decisiontree_s.fit(X_train, y_train)
m_decisiontree_a.fit(X_train, y_train)
m_decisiontree_a_s.fit(X_train, y_train)

# Random Forest
m_randomforest = Pipeline(steps=[
    ('preprocessor', preprocessor_std),
    ('classifier', RandomForestClassifier(random_state=123, max_depth=5))
])

m_randomforest_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_sex),
    ('classifier', RandomForestClassifier(random_state=123, max_depth=5))
])

m_randomforest_a = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age),
    ('classifier', RandomForestClassifier(random_state=123, max_depth=5))
])

m_randomforest_a_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age_sex),
    ('classifier', RandomForestClassifier(random_state=123, max_depth=5))
])

m_randomforest.fit(X_train, y_train)
m_randomforest_s.fit(X_train, y_train)
m_randomforest_a.fit(X_train, y_train)
m_randomforest_a_s.fit(X_train, y_train)

# XGBoost

m_gbtree = Pipeline(steps=[
    ('preprocessor', preprocessor_std),
    ('classifier', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=5, random_state=123))
])

m_gbtree_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_sex),
    ('classifier', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=5, random_state=123))
])

m_gbtree_a = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age),
    ('classifier', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=5, random_state=123))
])

m_gbtree_a_s = Pipeline(steps=[
    ('preprocessor', preprocessor_rm_age_sex),
    ('classifier', GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=5, random_state=123))
])


m_gbtree.fit(X_train, y_train)
m_gbtree_s.fit(X_train, y_train)
m_gbtree_a.fit(X_train, y_train)
m_gbtree_a_s.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('standardscaler',
                                                  StandardScaler(),
                                                  ['job', 'credit_amount',
                                                   'duration']),
                                                 ('onehotencoder',
                                                  OneHotEncoder(),
                                                  ['housing', 'saving_accounts',
                                                   'checking_account',
                                                   'purpose'])])),
                ('classifier',
                 GradientBoostingClassifier(learning_rate=1.0, max_depth=5,
                                            random_state=123))])

In [None]:
print('loading pipes ...')

In [13]:
# Re sampling
def resampling_model(model, model_explainer, protected, type_resampling="uniform"):
    
    # Select observations for re sampling
    if type_resampling == "preferential":
        indices = dx.fairness.resample(protected, y, type = 'preferential', # different type 
                                                    probs = explainer.y_hat, # requires probabilities 
                                                    verbose = False)
    else:
        indices = dx.fairness.resample(protected, y, verbose = False)
    
    
    # create new model bjects
    new_model = copy(model)
    
    # re-train models
    new_model.fit(X.iloc[indices, :], y[indices])
    
    return new_model

In [14]:
# Re weighting
def reweighting_model(model):
    weights = dx.fairness.reweight(protected, y, verbose = False)
    
    model_weighted = model

    kwargs = {model_weighted.steps[-1][0] + '__sample_weight': weights}
    
    model_weighted.fit(X,y, **kwargs)
    
    return model_weighted
    

In [None]:
def plot_radar_group(fobject, title=None, metrics=["TPR", "ACC", "PPV", "FPR", "STP"]):
    
    if metrics == 'all':
        metrics = fobject.metric_scores.columns
    
    # Prevent crash for wrong metric name
    metrics_common = [x for x in metrics if x in fobject.metric_scores]
    
    df = fobject.metric_scores[metrics_common].stack().reset_index()
    df.columns = ['Group', 'Fairness Metric', 'Value']
    
    fig = px.line_polar(df,
                        r="Value",
                        theta='Fairness Metric',
                        color="Group",
                        line_close=True,
                        hover_name='Group')

    if title is None:
        title = 'Radar Plot by Group'
        
    fig.update_layout(title=title, polar=dict(radialaxis=dict(tickangle=0, nticks=6, range=[0, 1])))
    
    fig.show()

In [29]:
def create_all_explainers_fairness_objects(parameters_dict, epsilon=0.8, theta=0.02, runsubpart=None):
    
    protected = parameters_dict['protected']
    privileged = parameters_dict['privileged']
    
    # Explainer & Fairness object of base model (selected by user)
    explainer_base = dx.Explainer(eval(parameters_dict['model_name']), X, y, verbose=False)
    print("-")
    fairness_object_base = explainer_base.model_fairness(protected = protected, privileged = privileged, verbose=False)
    fairness_object.fairness_check(epsilon = epsilon)

    # Mitigation 1: Remove sensitive columns
    print('---')
    explainer_rm = dx.Explainer(eval(parameters_dict['model_rm_name']), X, y, verbose=False)
    fobject_rm = explainer_rm.model_fairness(protected = protected, privileged = privileged, verbose=False, label='base_remove_columns')
    fobject_rm.fairness_check(epsilon = epsilon)
    print('---')
    # Mitigation 2: Resampling data 
    model_p = resampling_model(eval(parameters_dict['model_name']), explainer_base, protected, type_resampling="preferential")
    model_u = resampling_model(eval(parameters_dict['model_name']), explainer_base, protected, type_resampling="uniform")

    explainer_p = dx.Explainer(model_p, X, y, verbose = False)
    explainer_u = dx.Explainer(model_u, X, y, verbose = False)

    fobject_p = explainer_p.model_fairness(protected, privileged, verbose=False, label='base_preferential_resampling')
    fobject_u = explainer_u.model_fairness(protected, privileged, verbose=False, label='base_uniform_resampling')

    # Mitigation 3: Reweighting data
    model_w = reweighting_model(eval(parameters_dict['model_name']))

    explainer_w = dx.Explainer(model_w, X, y, verbose = False)

    fobject_w = explainer_w.model_fairness(protected, privileged, verbose=False, label='base_reweighted')
    
    # Mitigation 4: Roc-pivot switcher
    explainer_roc = dx.Explainer(eval(parameters_dict['model_name']), X, y, verbose=False)
    explainer_roc = dx.fairness.roc_pivot(explainer_roc, protected, privileged, theta = theta, verbose = False)
    fobject_roc = explainer_roc.model_fairness(protected, privileged, verbose=False, label='base_roc-pivot')

    return (explainer_base, fairness_object_base), (explainer_rm, fobject_rm), (explainer_p, fobject_p),(explainer_u, fobject_u), (explainer_w, fobject_w), (explainer_roc, fobject_roc)

In [33]:
def display_all(metricslist, graphic, explainers_fairness, parameters_dict, epsilon = 0.8):
    
    # BIT OF CHECKS & PREPROCESSING
    metrics = [x for x in metricslist if x in ["TPR", "TNR", "PPV", "NPV", "FNR", "FPR", "FDR", "FOR", "ACC", "STP"]]
    if len(metrics) < 1:
        return ("Please, select correct metrics (List of possiblities is above)")
    
    if graphic not in ['Default', 'Radar', 'Stack']:
        print ("Graphic type is incorrect, the default one will be used")
        graphic == "Default"
    
    # 1 - STATUS OF THE SELECTED MODEL
    
    print("Let's check the fairness performance of the selected model according to the specific populations declared:")
    
    print("""  This is a default graph produced by the dalex library. It does not take into account selection made for METRICS_LIST and GRAPHIC.
    
    """)
    protected = parameters_dict['protected']
    privileged = parameters_dict['privileged']
    
    # Explainer & Fairness object of base model (selected by user)
    explainer_base = dx.Explainer(eval(parameters_dict['model_name']), X, y, verbose=False)
    fairness_object_base = explainer_base.model_fairness(protected = protected, privileged = privileged, verbose=False)
    fairness_object.fairness_check(epsilon = epsilon)
    
    explainers_fairness[0][1].plot()
    
    print("""\033[1m Doing nothing. Is that so bad, really ?\033[0m

  • If all bars are in the green area, then according to your criteria your model is not biased. However, if you set a threshold for the age, are you sure that moving it a little bit (± 1 to 5 years) will not return a biased result ? Try it to be sure !

  • If a bias has been detected, have a look below to see how you can mitigate it !
  """)
    
    # 2 - NAIVE APPROACH
    print("\033[1m" + "Trying to mitigate a bias" + "\033[0m")
    print("\033[1m" + "  Option 1: Remove the sensitive variable" + "\033[0m")
    print("""  
    This is a default graph produced by the dalex library. It does not take into account selection made for METRICS_LIST and GRAPHIC.
    
    """)
    explainers_fairness[1][1].plot([explainers_fairness[0][1]])
    
    print("""How did you model evolved regarding to your fairness metrics ? Is it better without the column ? 

  • If yes, you're lucky this kind of naive preprocessing used to be useless most of the time. Usually the protected and biased variable is correlated with others explanatory variables and then removing it do not helps to unbias your model ! 

  • If no, well that kind of normal, let's see more appropriate ways to deal with biased models.
  
  """)
    
    # 3 - RESAMPLING
    print("\033[1m" + "Trying to mitigate a bias" + "\033[0m")
    print("\033[1m" + "  Option 2: Resampling training data" + "\033[0m")
    
    print("""  
    Did you look at the distribution of the biased variable ? Maybe some values of the variable are under-represented or over-represented. Resampling more equally training data would help to mitigate bias due to this king of issue.
    
    Let's compare the effect of this method to the default model:
    """)
    if graphic != 'Default':
        explainers_fairness[0][1].plot([explainers_fairness[2][1], explainers_fairness[3][1]], 
                                      type=graphic, 
                                      metrics=metrics)
    else:
        explainers_fairness[0][1].plot([explainers_fairness[2][1], explainers_fairness[3][1]])
        
    print("""So far, this solution may have resolved the unfairness issue. If not let's see another possibility !
    
    """)    
    
        # 3 - REWEIGHTING
    print("\033[1m" + "Trying to mitigate a bias" + "\033[0m")
    print("\033[1m" + "  Option 3: Reweighting observations" + "\033[0m")
    
    print("""  
    The reweighting algorithm looks at the protected attribute and on the real label. Then, it calculates the probability of assigning favorable label (y=1) assuming the protected attribute and y are independent. Of course, if there is bias, they will be statistically dependent. Then, the algorithm divides calculated theoretical probability by true, empirical probability of this event. That is how weight is created.
    
    Let's compare the effect of this method to the default model:
    """)
    
    if graphic != 'Default':
        explainers_fairness[0][1].plot([explainers_fairness[4][1]], 
                                      type=graphic, 
                                      metrics=metrics)
    else:
        explainers_fairness[0][1].plot([explainers_fairness[4][1]])
        
    print("""So far, this solution may have resolved the unfairness issue. If not let's see another possibility !
    
    """)    
    
        # 4 - ROC
    print("\033[1m" + "Trying to mitigate a bias" + "\033[0m")
    print("\033[1m" + "  Option 4: The ROC-Pivot method (Postprocessing)" + "\033[0m")
    
    print("""  
    This method of mitigation aims to change predictions for items close to the decision frontier.
It switches labels if an observation is from the unprivileged group and on the left (wrong side) of the cutoff. Note that It can also switches labels if an observation is from the privileged group and on the right of the cutoff.
    
    Let's compare the effect of this method to the default model:
    """)
    
    if graphic != 'Default':
        explainers_fairness[0][1].plot([explainers_fairness[5][1]], 
                                      type=graphic, 
                                      metrics=metrics)
    else:
        explainers_fairness[0][1].plot([explainers_fairness[5][1]])
        
    print("""This 3 previous methods are implemented by the library dalex to mitigate bias.
    
    Let's see below all solutions in 1 graphic:""")    
    
    
    if graphic != 'Default':
        explainers_fairness[0][1].plot([explainers_fairness[2][1], explainers_fairness[3][1], 
                                        explainers_fairness[4][1], explainers_fairness[5][1]], 
                                      type=graphic, 
                                      metrics=metrics)
    else:
        explainers_fairness[0][1].plot([explainers_fairness[2][1], explainers_fairness[3][1], 
                                        explainers_fairness[4][1], explainers_fairness[5][1]])
        
    
    
    
    return "tada"
    

In [None]:
def display_groups_fairness(selected_model, explainers_fairness, metricslist):
    
    metrics = [x for x in metricslist if x in ["TPR", "TNR", "PPV", "NPV", "FNR", "FPR", "FDR", "FOR", "ACC", "STP"]]
    
    
    if selected_model == "base":
        plot_radar_group(explainers_fairness[0][1], 
                         title=None, 
                         metrics=metrics)
    
    elif selected_model == "sampling_p":
        plot_radar_group(explainers_fairness[2][1], 
                         title=None, 
                         metrics=metrics)
    
    elif selected_model == "sampling_u":
        plot_radar_group(explainers_fairness[3][1], 
                         title=None, 
                         metrics=metrics)
    
    elif selected_model == "weights":
        plot_radar_group(explainers_fairness[4][1], 
                         title=None, 
                         metrics=metrics)
    
    elif selected_model == "roc-pivot":
        plot_radar_group(explainers_fairness[5][1], 
                         title=None, 
                         metrics=metrics)

    elif selected_model == "remove":
        plot_radar_group(explainers_fairness[1][1], 
                         title=None, 
                         metrics=metrics)
  
    else:
        print("model not recognize")

In [25]:
print("all loaded")

loaded
