In [None]:
#! pip install dalex

In [1]:
import dalex as dx
import numpy as np

import plotly.express as px  # Radar plot

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier

# credit data
data = dx.datasets.load_german()

# risk is the target
X = data.drop(columns='risk')
y = data.risk


categorical_features = ['sex', 'job', 'housing', 'saving_accounts', "checking_account", 'purpose']
numerical_features = ['age', 'duration', 'credit_amount']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', 'passthrough', numerical_features)
])

clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))
])

clf.fit(X, y)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('cat',
                                                  Pipeline(steps=[('onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  ['sex', 'job', 'housing',
                                                   'saving_accounts',
                                                   'checking_account',
                                                   'purpose']),
                                                 ('num', 'passthrough',
                                                  ['age', 'duration',
                                                   'credit_amount'])])),
                ('classifier',
                 DecisionTreeClassifier(max_depth=7, random_state=123))])

In [4]:
exp = dx.Explainer(clf, X, y)

Preparation of a new explainer is initiated

  -> data              : 1000 rows 9 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 1000 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_proba_default at 0x7ff094e999d0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.7, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -0.944, mean = 0.0, max = 0.9
  -> model_info        : package sklearn

A new explainer has been created!


In [5]:
# array with values like male_old, female_young, etc.
protected = data.sex + '_' + np.where(data.age < 25, 'young', 'old')
privileged = 'male_old'

In [6]:
fobject = exp.model_fairness(protected = protected, privileged=privileged)

In [24]:
def plot_radar_group(fobject, title=None, metrics=["TPR", "ACC", "PPV", "FPR", "STP"]):
    
    if metrics == 'all':
        metrics = fobject.metric_scores.columns
    
    # Prevent crash for wrong metric name
    metrics_common = [x for x in metrics if x in fobject.metric_scores]
    
    df = fobject.metric_scores[metrics_common].stack().reset_index()
    df.columns = ['Group', 'Fairness Metric', 'Value']
    
    fig = px.line_polar(df,
                        r="Value",
                        theta='Fairness Metric',
                        color="Group",
                        line_close=True,
                        hover_name='Group')

    if title is None:
        title = 'Radar Plot by Group'
        
    fig.update_layout(title=title, polar=dict(radialaxis=dict(tickangle=0, nticks=6, range=[0, 1])))
    
    fig.show()
    #

In [26]:
plot_radar_group(fobject)

In [27]:
plot_radar_group(fobject, metrics='all')