# Fairness Metrics

This notebook implements the statistical fairness metrics from:
*Towards the Right Kind of Fairness in AI* by Boris Ruf and Marcin Detyniecki (2021)
https://arxiv.org/abs/2102.08453

Example with the `german-risk-scoring.csv` dataset.

Contributeurs : Xavier Lioneton & Francis Wolinski

## Imports

In [1]:
# imports
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from IPython.display import display, Markdown

## Data Load

In [2]:
# dataset
data = pd.read_csv('german-risk-scoring.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 21 columns):
 #   Column                                                    Non-Null Count  Dtype 
---  ------                                                    --------------  ----- 
 0   Status of existing checking account                       1000 non-null   object
 1   Duration in month                                         1000 non-null   int64 
 2   Credit history                                            1000 non-null   object
 3   Purpose                                                   1000 non-null   object
 4   Credit amount                                             1000 non-null   int64 
 5   Savings account/bonds                                     1000 non-null   object
 6   Present employment since                                  1000 non-null   object
 7   Installment rate in percentage of disposable income       1000 non-null   int64 
 8   Personal status and sex      

In [3]:
# target
data['Cost Matrix(Risk)'].value_counts()

Good Risk    700
Bad Risk     300
Name: Cost Matrix(Risk), dtype: int64

In [4]:
# Personal status and sex
data["Personal status and sex"].value_counts()

male:single                          548
female:divorced/separated/married    310
male:married/widowed                  92
male:divorced/separated               50
Name: Personal status and sex, dtype: int64

## Data Prep

In [5]:
# create sex column
data["sex"] = data["Personal status and sex"].apply(lambda x : x.split(":")[0])

In [6]:
# create X=features, y=target
X = data.drop(columns = 'Cost Matrix(Risk)')
y = data['Cost Matrix(Risk)'].map({"Good Risk": 1, "Bad Risk": 0})

In [7]:
# type modifications

cols_cat = [
    'Status of existing checking account',
    'Credit history',
    'Purpose',
    'Savings account/bonds',
    'Present employment since',
    'Personal status and sex', 
    'Other debtors / guarantors',
    'Property',
    'Other installment plans',
    'Housing',
    'Job',
    'Telephone',
    'foreign worker',
    'sex'
       ]

cols_num = [
    'Duration in month',
    'Credit amount',
    'Installment rate in percentage of disposable income',
    'Present residence since',
    'Age in years',
    'Number of existing credits at this bank',
    'Number of people being liable to provide maintenance for',
    ]

for col in cols_cat:
    data[col] = data[col].astype(str)
    
for col in cols_num:
    data[col] = data[col].astype(float)

cols = cols_cat + cols_num

In [8]:
# unique values of categorical columns

X[cols_cat].nunique()

Status of existing checking account     4
Credit history                          5
Purpose                                10
Savings account/bonds                   5
Present employment since                5
Personal status and sex                 4
Other debtors / guarantors              3
Property                                4
Other installment plans                 3
Housing                                 3
Job                                     4
Telephone                               2
foreign worker                          2
sex                                     2
dtype: int64

In [9]:
# all to numbers

encoder = OneHotEncoder()
X_cat = encoder.fit_transform(X[cols_cat]).toarray()
X_num = X[cols_num]
X_prep = np.concatenate((X_num, X_cat), axis=1)
X_prep.shape

(1000, 63)

In [10]:
# data prepared

cols = data[cols_num].columns.tolist() + encoder.get_feature_names(input_features=X[cols_cat].columns).tolist()
data_prep = pd.DataFrame(X_prep, columns=cols)
data_prep.shape

(1000, 63)

In [11]:
# data prepared

data_prep.head()

Unnamed: 0,Duration in month,Credit amount,Installment rate in percentage of disposable income,Present residence since,Age in years,Number of existing credits at this bank,Number of people being liable to provide maintenance for,Status of existing checking account_0 <= <200 DM,Status of existing checking account_<0 DM,Status of existing checking account_>= 200 DM,...,Job_management/ highly qualified employee,Job_skilled employee / official,Job_unemployed/ unskilled - non-resident,Job_unskilled - resident,Telephone_none,Telephone_yes,foreign worker_no,foreign worker_yes,sex_female,sex_male
0,6.0,1169.0,4.0,4.0,67.0,2.0,1.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
1,48.0,5951.0,2.0,2.0,22.0,1.0,1.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0
2,12.0,2096.0,2.0,3.0,49.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0
3,42.0,7882.0,2.0,4.0,45.0,1.0,2.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
4,24.0,4870.0,3.0,4.0,53.0,2.0,2.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0


## Machine Learning

In [12]:
# split train test

X_train, X_test, y_train, y_test = train_test_split(data_prep, y, test_size=0.2, random_state=42)
X_train = X_train.copy()
X_test = X_test.copy()
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(800, 63) (200, 63) (800,) (200,)


### Train Model

In [13]:
# train model

clf = LogisticRegression(random_state=0, n_jobs=8, max_iter=500)
clf.fit(X_train, y_train)

LogisticRegression(max_iter=500, n_jobs=8, random_state=0)

### Confusion mattrix

In [14]:
# Schema of confusion matrix

df = pd.DataFrame([['True negatives (TN)', 'False positives (FP)'], ['False negatives (FN)', 'True positives (TP)']], index=['Y = 0', 'Y = 1'], columns=['Ŷ = 0', 'Ŷ = 1'])
df = df.reindex(['Y = 1', 'Y = 0'])
df = df[['Ŷ = 1', 'Ŷ = 0']]
display(Markdown('**Schema of confusion matrix**'))
display(df)

**Schema of confusion matrix**

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,True positives (TP),False negatives (FN)
Y = 0,False positives (FP),True negatives (TN)


In [15]:
# function pretty_confusion_mattrix()

def pretty_confusion_mattrix(y_label, y_pred, title=None):
    """Pretty print the confusion matrix computed by scikit-learn"""
    _TN, _FP, _FN, _TP = confusion_matrix(y_label, y_pred).flatten()
    array = [[_TP, _FN], [_FP, _TN]]
    df = pd.DataFrame(array, index=['Y = 1', 'Y = 0'], columns=['Ŷ = 1', 'Ŷ = 0'])
    if title is not None:
        display(Markdown(title))
    display(df)

In [16]:
# test dataset

y_pred = clf.predict(X_test)

pretty_confusion_mattrix(y_test, y_pred, title='**Confusion matrix for the test dataset**')

**Confusion matrix for the test dataset**

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,124,17
Y = 0,27,32


In [17]:
# function pretty_confusion_mattrix_by_subgroup()

def pretty_confusion_mattrix_by_subgroup(X, col, X_test, y_label, y_pred, q=4):
    """Pretty print the confusion matrices by subgroup
    
    X: dataset
    col: used for spliting in subgroups
    X_test: test dataset
    y_label: target for test dataset
    y_pred: predictions for test dataset
    q: quartile used for numerical column"""
    
    # if col is numeric, use quantile
    cat = pd.qcut(X[col], q) if is_numeric_dtype(X[col]) else X[col]
    # select test data
    cat = cat.loc[X_test.index]
    # switch y_pred to Series so as to be able to select by subgroup
    y_pred = pd.Series(y_pred, index=y_label.index)
    # loop on subgroups
    for value in sorted(cat.unique()):
        X_select = X_test.loc[cat == value]
        pretty_confusion_mattrix(y_label.loc[X_select.index],
                                 y_pred.loc[X_select.index],
                                 title=f'**Subgroup**: {col} = {value}')

In [18]:
pretty_confusion_mattrix_by_subgroup(X, 'sex', X_test, y_test, y_pred)

**Subgroup**: sex = female

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,35,4
Y = 0,7,10


**Subgroup**: sex = male

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,89,13
Y = 0,20,22


In [19]:
pretty_confusion_mattrix_by_subgroup(X, 'Age in years', X_test, y_test, y_pred)

**Subgroup**: Age in years = (18.999, 27.0]

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,35,8
Y = 0,3,14


**Subgroup**: Age in years = (27.0, 33.0]

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,28,4
Y = 0,7,7


**Subgroup**: Age in years = (33.0, 42.0]

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,35,4
Y = 0,6,5


**Subgroup**: Age in years = (42.0, 75.0]

Unnamed: 0,Ŷ = 1,Ŷ = 0
Y = 1,26,1
Y = 0,11,6


### Metrics derived from confusion matrix

**Actual postitives**

This number is the sum of the true positives and the false negatives, which can be viewed as missed
true positives.

$P = TP + FN$

**Actual negatives**

This number is the sum of the true negatives and the false positives, which again can be viewed as missed true negatives.

$N = TN + FP$

**Base rate**

This number, sometimes also called the prevalence rate, represents the proportion of actual positives with respect to the entire data set.

$BR = \frac{P}{P + N}$

**Positive rate**

This number is the overall rate of positively classified instances, including both correct and incorrect decisions.

$PR = \frac{TP + FP}{P + N}$

**Negative rate**

This number is the ratio of negative classification, again irrespective of whether the decisions were correct or incorrect.

$NR = \frac{TN + FN}{P + N}$

**Accuracy**

This number is the ratio of the correctly classified instances (positive and negative) of all decisions.

$ACC = \frac{TP + TN}{P + N}$

**Misclassiffication rate**

This number is the ratio of the misclassified instances over all decisions.

$MR = \frac{FN + FP}{P + N}$

**True positive rate (recall)**

This number describes the proportions of correctly classified positive instances.

$TPR = \frac{TP}{P}$

**True negative rate**

This number describes the proportions of correctly classified negative instances.

$TNR = \frac{TN}{N}$

**False positive rate**

This number denotes the proportion of actual negatives which was falsely classified as positive.

$FPR = \frac{FP}{P}$

**False negative rate (silence)**

This number describes the proportion of actual positives which was misclassified as negative.

$FNR = \frac{FN}{N}$

**False discovery rate (noise)**

This number describes the share of misclassified positive classifications of all positive predictions.

$FDR = \frac{FP}{TP + FP}$

**Positive predicted value (precision)**

This number describes the ratio of samples which were correctly classified as positive from all the positive predictions.

$PPV = \frac{TP}{TP + FP}$

**False omission rate**

This number describes the proportion of false negative predictions of all negative predictions.

$FOR = \frac{FN}{TN + FN}$

**Negative predicted value**

This number describes the ratio of samples which were correctly classified as negative from all the negative predictions.

$NPV = \frac{TN}{TN + FN}$

In [20]:
# function pretty_confusion_mattrix()

def pretty_fairness_confusion_mattrix(y_label, y_pred, title=None):
    """Pretty print fairness confusion matrix
    
    y_label: target for test dataset
    y_pred: predictions for test dataset
    title: string to display in Markdown"""
    
    # compute fairness metrics
    _TN, _FP, _FN, _TP = confusion_matrix(y_label, y_pred).flatten()
    _P = _TP + _FN
    _N = _FP + _TN
    _BR = _P / (_P + _N)
    _PR = (_TP + _FP) / (_P + _N)
    _NR = (_TN + _FN) / (_P + _N)
    _TPR = _TP / _P
    _TNR = _TN / _N
    _FDR = _FP / (_TP + _FP)
    _FOR = _FN / (_TN + _FN)
    
    # build the output dataframe
    array = [[_TP, _FN, f'TPR = {_TPR:.2f}'],
             [_FP, _TN, f'TNR = {_TNR:.2f}'],
             [f'FDR = {_FDR:.2f}', f'FOR = {_FOR:.2f}', f'BR = {_BR:.2f}'],
             [f'PR = {_PR:.2f}', f'NR = {_NR:.2f}', ''],
            ]
    df = pd.DataFrame(array, index=['Y = 0', 'Y = 1', '', ' '], columns=['Ŷ = 0', 'Ŷ = 1', ''])
    
    
    if title is not None:
        display(Markdown(title))
        
    display(df.style.set_table_styles([{'selector': 'td', 'props':[('text-align', 'center')]},
                                      {'selector': 'th', 'props': [('text-align', 'center')]}],
                                      overwrite=False))

In [21]:
pretty_fairness_confusion_mattrix(y_test, y_pred, title='**Fairness confusion matrix**')

**Fairness confusion matrix**

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,124,17,TPR = 0.88
Y = 1,27,32,TNR = 0.54
,FDR = 0.18,FOR = 0.35,BR = 0.70
,PR = 0.76,NR = 0.24,


In [22]:
# function pretty_fairness_confusion_mattrix_by_subgroup()

def pretty_fairness_confusion_mattrix_by_subgroup(X, col, X_test, y_label, y_pred, q=4):
    """Pretty print fairness confusion matrix by subgroup
    
    X: dataset
    col: used for spliting in subgroups
    X_test: test dataset
    y_label: target for test dataset
    y_pred: predictions for test dataset
    q: quartile used for numerical colum"""
    
    # if col is numeric, use quantile
    cat = pd.qcut(X[col], q) if is_numeric_dtype(X[col]) else X[col]
    # select test data
    cat = cat.loc[X_test.index]
    # switch y_pred to Series so as to be able to select by subgroup
    y_pred = pd.Series(y_pred, index=y_label.index)
    # loop on subgroups
    for value in sorted(cat.unique()):
        X_select = X_test.loc[cat == value]
        pretty_fairness_confusion_mattrix(y_label.loc[X_select.index],
                                          y_pred.loc[X_select.index],
                                          title=f'**Subgroup**: {col} = {value}')

In [23]:
pretty_fairness_confusion_mattrix_by_subgroup(X, 'sex', X_test, y_test, y_pred)

**Subgroup**: sex = female

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,35,4,TPR = 0.90
Y = 1,7,10,TNR = 0.59
,FDR = 0.17,FOR = 0.29,BR = 0.70
,PR = 0.75,NR = 0.25,


**Subgroup**: sex = male

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,89,13,TPR = 0.87
Y = 1,20,22,TNR = 0.52
,FDR = 0.18,FOR = 0.37,BR = 0.71
,PR = 0.76,NR = 0.24,


In [24]:
pretty_fairness_confusion_mattrix_by_subgroup(X, 'Age in years', X_test, y_test, y_pred)

**Subgroup**: Age in years = (18.999, 27.0]

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,35,8,TPR = 0.81
Y = 1,3,14,TNR = 0.82
,FDR = 0.08,FOR = 0.36,BR = 0.72
,PR = 0.63,NR = 0.37,


**Subgroup**: Age in years = (27.0, 33.0]

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,28,4,TPR = 0.88
Y = 1,7,7,TNR = 0.50
,FDR = 0.20,FOR = 0.36,BR = 0.70
,PR = 0.76,NR = 0.24,


**Subgroup**: Age in years = (33.0, 42.0]

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,35,4,TPR = 0.90
Y = 1,6,5,TNR = 0.45
,FDR = 0.15,FOR = 0.44,BR = 0.78
,PR = 0.82,NR = 0.18,


**Subgroup**: Age in years = (42.0, 75.0]

Unnamed: 0,Ŷ = 0,Ŷ = 1,Unnamed: 3
Y = 0,26,1,TPR = 0.96
Y = 1,11,6,TNR = 0.35
,FDR = 0.30,FOR = 0.14,BR = 0.61
,PR = 0.84,NR = 0.16,
