# Goal: Fairness Metrics Computation

The goal of this notebook is to computes statistical fairness metrics, after the execution of a set of mitigation techniques, given:
- a training dataset (mitigated in case of a pre-processing technique),
- a target variable,
- a sensible attribute.


# Import Libraries





In [1]:
try:
  from google.colab import drive
  drive.mount('/content/drive')
  import sys
  path_to_project = '/content/drive/MyDrive/FairAlgorithm'
  sys.path.append(path_to_project)
  !sudo apt install libcairo2-dev pkg-config python3-dev
  IN_COLAB = True
except:
  IN_COLAB = False

Mounted at /content/drive
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
python3-dev is already the newest version (3.10.6-1~22.04.1).
python3-dev set to manually installed.
The following packages were automatically installed and are no longer required:
  libbz2-dev libpkgconf3 libreadline-dev
Use 'sudo apt autoremove' to remove them.
The following additional packages will be installed:
  libblkid-dev libblkid1 libcairo-script-interpreter2 libffi-dev
  libglib2.0-dev libglib2.0-dev-bin libice-dev liblzo2-2 libmount-dev
  libmount1 libpixman-1-dev libselinux1-dev libsepol-dev libsm-dev
  libxcb-render0-dev libxcb-shm0-dev
Suggested packages:
  libcairo2-doc libgirepository1.0-dev libglib2.0-doc libgdk-pixbuf2.0-bin
  | libgdk-pixbuf2.0-dev libxml2-utils libice-doc cryptsetup-bin libsm-doc
The following packages will be REMOVED:
  pkgconf r-base-dev
The following NEW packages will be installed:
  libblkid-dev libcairo-script-interpreter2 

In [2]:
#import libraries
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.model_selection import cross_validate,cross_val_score,cross_val_predict,train_test_split,StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from tqdm.notebook import tqdm

# Configure the notebook


In the next code cell, set all the variables that are used throughout the notebook.  
The variables are used to configure the notebook, and to set the paths to the data files.

Modify the variables in the next code cell to configure the notebook

- `dataset_name`: The name of the dataset file.
- `dataset_path`: The path to the dataset file.
- `target`: The target feature to predict.
- `target_variable_labels`: The labels for the target feature.
- `sensible_attribute`: The sensible attribute to use for bias mitigation.

In [283]:
mitigation = 'aif360-roc'  # options: [fl-cr, fl-to, aif360-rw, aif360-di, aif360-lfr, aif360-ad, aif360-pr, aif360-er, aif360-ce, aif360-eo, aif360-roc]

In [284]:
#INPUT
dataset_name = "diabetes-women"
#dataset_name = "sepsis"

if dataset_name == "diabetes-women":
  ignore_cols = ['Age']
  target_variable = 'Outcome'
  target_variable_labels= [1,0]
  sensible_attribute = 'AgeCategory'
  default_mappings = {
      'label_maps': [{1.0: 'Diabetic', 0.0: 'NonDiabetic'}],
      'protected_attribute_maps': [{1.0: 'Adult', 0.0: 'Young'}]
  }

elif dataset_name == "sepsis":
  ignore_cols = []
  target_variable = 'Mortality'
  target_variable_labels= [1,0]
  sensible_attribute = 'Gender_cat'
  #sensible_attribute = 'Age_cat'

#dataset_path = path_to_project + '/data/preprocessed/preprocessed-{}.csv'.format(dataset_name) if IN_COLAB else 'data/preprocessed/preprocessed-{}.csv'.format(dataset_name)
#df = pd.read_csv(dataset_path)
#df = df.drop(columns=ignore_cols)
#feature_cols = df.columns

In [285]:
n_estimators = 30
random_seed = 1234
n_splits= 10

models = {'Logistic Regression':LogisticRegression(max_iter=500),
          'Decision Tree':DecisionTreeClassifier(max_depth=None),
          'Bagging':BaggingClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=n_estimators),
          'Random Forest':RandomForestClassifier(n_estimators=n_estimators),
          'Extremely Randomized Trees':ExtraTreesClassifier(n_estimators=n_estimators),
          'Ada Boost':AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=n_estimators)}

family = ['division', 'subtraction']
fairness_catalogue = ['GroupFairness', 'PredictiveParity', 'PredictiveEquality', 'EqualOpportunity', 'EqualizedOdds', 'ConditionalUseAccuracyEquality', 'OverallAccuracyEquality', 'TreatmentEquality', 'FORParity', 'FN', 'FP']

all_mitigations = ['original','fl-cr', 'fl-to', 'aif360-rw', 'aif360-di', 'aif360-lfr', 'aif360-op', 'aif360-ad', 'aif360-pr', 'aif360-er', 'aif360-ce', 'aif360-eo', 'aif360-roc']

without_model_mitigations = ['aif360-ad', 'aif360-pr', 'aif360-er']
new_dataset_mitigations = ["fl-cr", "aif360-di", "aif360-op" "aif360-lfr"]

In [286]:
# Load the correct source dataset, considering that pre-processing techniques
# modify the original dataset, while in- and post- processing do not
if mitigation in new_dataset_mitigations:
  dataset_path = path_to_project + '/data/mitigated/mitigated-{}-{}-{}.csv'.format(dataset_name, sensible_attribute, mitigation) if IN_COLAB else 'data/mitigated/mitigated-{}-{}.csv'.format(dataset_name, mitigation)
else:
  dataset_path = path_to_project + '/data/preprocessed/preprocessed-{}.csv'.format(dataset_name) if IN_COLAB else 'data/preprocessed/preprocessed-{}.csv'.format(dataset_name)

In [287]:
df = pd.read_csv(dataset_path)
#df = df.drop(columns=ignore_cols)
feature_cols = df.columns
#sensible_values = [0, 1]  # 0 is the discriminated group, 1 the privileged one

In [288]:
config = {}
config['df']= df
config['target_variable'] = target_variable
config['sensible_attribute'] = sensible_attribute
config['path_to_project'] = path_to_project
config['n_splits'] = n_splits
config['models'] = models
config['n_estimators'] = n_estimators
config['random_seed'] = random_seed

In [289]:
def unpack_config(config):
  return config['df'], config['target_variable'], config['sensible_attribute'], config['path_to_project'], config['n_splits'], config['models'], config['n_estimators'],config['random_seed']

#Retrieve Predictions and test values

Load the predictions and scores of ML algorithms after the mitigation.

In [290]:
load_path = path_to_project + '/data/predictions_and_tests/pred_test-{}-{}-{}.p'.format(dataset_name, sensible_attribute, mitigation)
with open(load_path, 'rb') as fp:
    predictions_and_tests = pickle.load(fp)

In [291]:
predicted_values = predictions_and_tests

In [292]:
print(predictions_and_tests)

{'Logistic Regression': {0: {'y_test': array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0]), 'y_pred': array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0]), 's_test': array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       1])}, 1: {'y_test': array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1]), 'y

# Libraries to build Confusion Matrix and compute performance metrics

We need to save the indexes of both groups `privileged` and `discriminated` in two lists.

`y_privileged` is the part the dataset where `sensible_value` = 1 (for example `AgeCategory` = 1), and `y_discriminated` is the part of dataset where `sensible_value` = 0.

Build the confusion matrices (one for the privileged group, one for the discriminated group) for each model.


Not for in-processing that has only one ML model!

In [293]:
def compute_scores(predictions_and_tests, models, n_splits):
  precision = {}
  recall = {}
  accuracy = {}
  f1_score = {}

  if mitigation not in without_model_mitigations:
    for model_name in (models):

      precisions = []
      recalls = []
      accuracys = []
      f1_scores = []
      for i in range(0,n_splits):
        y_test = predictions_and_tests[model_name][i]['y_test']
        y_pred = predictions_and_tests[model_name][i]['y_pred']
        #print(len(y_test), len(y_pred))
        precisions.append(metrics.precision_score(y_test, y_pred))
        recalls.append(metrics.recall_score(y_test, y_pred))
        accuracys.append(metrics.accuracy_score(y_test, y_pred))
        f1_scores.append(metrics.f1_score(y_test, y_pred))
      precision[model_name] = precisions
      recall[model_name] = recalls
      accuracy[model_name] = accuracys
      f1_score[model_name] = f1_scores
  else:
    precisions = []
    recalls = []
    accuracys = []
    f1_scores = []
    for i in range(0,n_splits):
        y_test = predictions_and_tests[i]['y_test']
        y_pred = predictions_and_tests[i]['y_pred']
        #print(len(y_test), len(y_pred))
        precisions.append(metrics.precision_score(y_test, y_pred))
        recalls.append(metrics.recall_score(y_test, y_pred))
        accuracys.append(metrics.accuracy_score(y_test, y_pred))
        f1_scores.append(metrics.f1_score(y_test, y_pred))
    precision = precisions
    recall = recalls
    accuracy = accuracys
    f1_score = f1_scores
  return accuracy, precision, recall, f1_score

In [294]:
def compute_mean_std_dev(metric_list, models):
  metric_dict = {}
  if models is not None:
    for model_name in (models):
      metric = np.array(metric_list[model_name])
      mean_metric = metric.mean()
      std_metric = metric.std()
      metric_dict[model_name] = [mean_metric, std_metric]
  else:
    metric = np.array(metric_list)
    mean_metric = metric.mean()
    std_metric = metric.std()
    metric_dict = [mean_metric, std_metric]
  return metric_dict

In [295]:
def compute_confusion_matrices(predictions_and_tests, target_variable_labels, models, n_splits):
  confusion_matrices = {}
  if mitigation not in without_model_mitigations:
    for model_name in (models):
      cm_splits = {}
      for i in range(0,n_splits):
        temp_dict = {}
        cm_priviliged = {}
        cm_discriminated = {}
        y_test = predictions_and_tests[model_name][i]['y_test']
        y_pred = predictions_and_tests[model_name][i]['y_pred']
        s_test = predictions_and_tests[model_name][i]['s_test']

        df_metrics = pd.DataFrame({'s_test': s_test, 'y_test':y_test, 'y_pred':y_pred})

        df_discrim = df_metrics[df_metrics['s_test'] == 0]
        #len_dicr = len(df_discrim)
        df_priv = df_metrics[df_metrics['s_test'] == 1]
        #len_priv = len(df_priv)

        cm_discriminated = confusion_matrix(df_discrim['y_test'], df_discrim['y_pred'], labels=target_variable_labels)
        cm_privileged = confusion_matrix(df_priv['y_test'], df_priv['y_pred'], labels=target_variable_labels)
        temp_dict['discriminated'] = cm_discriminated
        temp_dict['privileged'] = cm_privileged
        cm_splits[i] = temp_dict
      confusion_matrices[model_name] = cm_splits
  else:
    cm_splits = {}
    for i in range(0,n_splits):
      temp_dict = {}
      cm_priviliged = {}
      cm_discriminated = {}
      y_test = predictions_and_tests[i]['y_test']
      y_pred = predictions_and_tests[i]['y_pred']
      s_test = predictions_and_tests[i]['s_test']

      df_metrics = pd.DataFrame({'s_test': s_test, 'y_test':y_test, 'y_pred':y_pred})

      df_discrim = df_metrics[df_metrics['s_test'] == 0]
      #len_dicr = len(df_discrim)
      df_priv = df_metrics[df_metrics['s_test'] == 1]
      #len_priv = len(df_priv)

      cm_discriminated = confusion_matrix(df_discrim['y_test'], df_discrim['y_pred'])
      cm_privileged = confusion_matrix(df_priv['y_test'], df_priv['y_pred'])
      temp_dict['discriminated'] = cm_discriminated
      temp_dict['privileged'] = cm_privileged
      cm_splits[i] = temp_dict
      confusion_matrices = cm_splits
  return confusion_matrices

##Functions to compute fairness metrics

Terminology:

- d is the predicted value,
- Y is the actual value in the dataset
- G the protected attribute, priv= privileged group, discr=discriminated group
- L is the legittimate attribute (only for Conditional Statistical Parity)

Fairness Metrics List:

1. Group Fairness: (d=1|G=priv) = (d=1|G=discr)
2. Predictive Parity: (Y=1|d=1,G=priv) = (Y=1|d=1,G=discr)
3. Predictive Equality: (d=1|Y=0,G=priv) = (d=1|Y=0,G=discr)
4. Equal Opportunity:  (d=0|Y=1,G=priv) = (d=0|Y=1,G=discr)
5. Equalized Odds: (d=1|Y=i,G=priv) = (d=1|Y=i,G=discr), i ∈ 0,1
6. ConditionalUseAccuracyEquality: (Y=1|d=1, G=priv) = (Y=1|d=1,G=discr) and (Y=0|d=0,G=priv) = (Y=0|d=0,G=discr)
7. Overall Accuracy Equality: (d=Y, G=priv) = (d=Y, G=priv)
8. Treatment Equality: (Y=1, d=0, G=priv)/(Y=0, d=1, G=priv) = (Y=1, d=0, G=discr)/(Y=0, d=1, G=discr)
9. FOR Parity: (Y=1|d=0, G=priv) = (Y=1|d=0,G=discr)

How to evaluate the results?

Looking at the value for each corresponding metric:

- If the value is between 0 and 1-t the discriminated group suffers from unfairness
- If the value is greater than 1+t the privileged group suffers from unfairness
- If the value is between 1-t and 1+t both privileged and discriminated group have a fair treatment

t is a threshold that should be choose by the user according to the context and the goal of the task.


In [296]:
# Retrieve TP, TN, FP, FN values from a confusion matrix
def retrieve_values(cm):
  TN = cm[0][0]
  FP = cm[0][1]
  FN = cm[1][0]
  TP = cm[1][1]
  total = TN+FP+FN+TP
  return TP, TN, FP, FN, total

def rescale(metric):
  metric = metric - 1
  return metric

def standardization(metric):
  if metric > 1:
    metric = 1
  elif metric < -1:
    metric = -1
  return metric

def valid(metric, th):
  if metric > 1-th and metric < 1+th:
    return True
  return False

def and_function(m1, m2, th):
  if m1 > 1+th and m2 > 1+th:
    return max(m1, m2)
  elif m1 < 1-th and m2 < 1-th:
    return min(m1, m2)
  elif valid(m1, th) and valid(m2, th):
    return max(m1, m2)
  elif (valid(m1, th) or valid(m2, th)) and (m1 > 1+th or m2 > 1+th):
    return max(m1, m2)
  elif (valid(m1, th) or valid(m2, th)) and (m1 < 1-th or m2 < 1-th):
    return min(m1, m2)
  else:
    return max(m1, m2)

In [297]:
# Fairness metrics computed using division operator
def fairness_metrics_division(confusion_matrix, threshold = 0.15):

  TP_priv, TN_priv, FP_priv, FN_priv, len_priv = retrieve_values(confusion_matrix['privileged'])
  TP_discr, TN_discr, FP_discr, FN_discr, len_discr = retrieve_values(confusion_matrix['discriminated'])

  GroupFairness_discr = (TP_discr+FP_discr)/len_discr
  GroupFairness_priv = (TP_priv+FP_priv)/len_priv
  if GroupFairness_priv == 0:
    GroupFairness = 2  #max value
  else:
     GroupFairness = GroupFairness_discr/GroupFairness_priv

  if TP_discr+FP_discr == 0:
    PredictiveParity_discr = 0
    PredictiveParity = 0  #min value
  else:
    PredictiveParity_discr = (TP_discr)/(TP_discr+FP_discr)
  if TP_priv+FP_priv == 0:
    PredictiveParity_priv = 0
    PredictiveParity = 2  #max value
  else:
    PredictiveParity_priv = (TP_priv)/(TP_priv+FP_priv)
  if PredictiveParity_discr != 0 and PredictiveParity_priv != 0:
    PredictiveParity = PredictiveParity_discr/PredictiveParity_priv
  elif PredictiveParity_priv == 0:
    PredictiveParity = 2  #max value
  else:
    PredictiveParity = 0  #min value

  if TN_discr+FP_discr == 0:
    PredictiveEquality_discr = 0
    PredictiveEquality = 0  #min value
  else:
    PredictiveEquality_discr = (FP_discr)/(TN_discr+FP_discr)
  if TN_priv+FP_priv == 0:
    PredictiveEquality_priv = 0
    PredictiveEquality = 2  #max value
  else:
    PredictiveEquality_priv = (FP_priv)/(TN_priv+FP_priv)
  if PredictiveEquality_discr != 0 and PredictiveEquality_priv != 0:
    PredictiveEquality = PredictiveEquality_discr/PredictiveEquality_priv
  elif PredictiveEquality_priv == 0:
    PredictiveEquality = 2  #max value
  else:
    PredictiveEquality = 0  #min value

  if FN_priv+TP_priv == 0:
    EqualOpportunity_priv = 0
    EqualOpportunity = 2  #max value
  else:
    EqualOpportunity_priv = (FN_priv)/(TP_priv+FN_priv)
  if FN_discr+TP_discr == 0:
    EqualOpportunity_discr = 0
    EqualOpportunity = 0  #min value
  else:
    EqualOpportunity_discr = (FN_discr)/(TP_discr+FN_discr)
  if EqualOpportunity_priv != 0 and EqualOpportunity_discr != 0:
    EqualOpportunity = EqualOpportunity_priv/EqualOpportunity_discr
  elif EqualOpportunity_discr == 0:
    EqualOpportunity = 0  #min value
  else:
    EqualOpportunity = 2  #max value

  if FN_discr+TP_discr == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 0 #min value
  elif FN_priv+TP_priv == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 2 #max value
  elif (TP_priv/(TP_priv+FN_priv)) == 0:
    EqualizedOdds1 = 2 #max value
  else:
    EqualizedOdds1 = ((TP_discr/(TP_discr+FN_discr)) / (TP_priv/(TP_priv+FN_priv))) # (1-equalOpportunity_discr)/(1-equalOpportunity_priv)
  if TN_priv+FP_priv == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 2 #max value
  elif TN_discr+FP_discr == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 0 #min value
  elif (FP_priv/(TN_priv+FP_priv)) == 0:
    EqualizedOdds2 = 2 #max value
  else:
    EqualizedOdds2 = ((FP_discr/(TN_discr+FP_discr)) / (FP_priv/(TN_priv+FP_priv))) # = PredictiveEquality
  # EqualizedOdds = (EqualizedOdds1 * EqualizedOdds2)
  if EqualizedOdds1 != 0 and EqualizedOdds2 != 0:
    EqualizedOdds = and_function(EqualizedOdds1, EqualizedOdds2, threshold)
  else:
    EqualizedOdds = 2 #max value

  if TP_discr+FP_discr == 0 or TN_discr+FP_discr == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality= 0 #min value
  elif (TP_priv/(TP_priv+FP_priv)) == 0:
    ConditionalUseAccuracyEquality1 = 2 #max value
  else:
    ConditionalUseAccuracyEquality1 = ((TP_discr/(TP_discr+FP_discr)) / (TP_priv/(TP_priv+FP_priv)))
  if TN_discr+FN_discr == 0 or TN_priv+FN_priv == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = 2 #max value
  elif (TN_priv/(TN_priv+FN_priv)) == 0:
    ConditionalUseAccuracyEquality2 = 2 #max value
  else:
    ConditionalUseAccuracyEquality2 = ((TN_discr/(TN_discr+FN_discr)) / (TN_priv/(TN_priv+FN_priv)))
  # ConditionalUseAccuracyEquality = (ConditionalUseAccuracyEquality1 * ConditionalUseAccuracyEquality2)
  if ConditionalUseAccuracyEquality1 != 0 and ConditionalUseAccuracyEquality2 != 0:
    ConditionalUseAccuracyEquality = and_function(ConditionalUseAccuracyEquality1, ConditionalUseAccuracyEquality2, threshold)
  else:
    ConditionalUseAccuracyEquality = 2 #max value

  if TP_priv == 0:
    OAE1 = 0
    OverallAccuracyEquality = 2 #max value
  else:
    OAE1 = TP_discr/TP_priv
  if TN_priv == 0:
    OAE2 = 0
    OverallAccuracyEquality = 2 #max value
  else:
    OAE2 = TN_discr/TN_priv
  # OverallAccuracyEquality = (OAE1 * OAE2)
  if OAE1 != 0 and OAE2 != 0:
    OverallAccuracyEquality = and_function(OAE1, OAE2, threshold)
  else:
    OverallAccuracyEquality = 2 #max value

  if FP_priv == 0:
    TreatmentEquality_priv = 0
    TreatmentEquality = 2  #max value
  else:
    TreatmentEquality_priv = (FN_priv/FP_priv)
  if FP_discr == 0:
    TreatmentEquality_discr = 0
    TreatmentEquality = 0 #min value
  elif (FN_discr/FP_discr) == 0:
    TreatmentEquality_discr = 0 #max value
    TreatmentEquality = 0 #min value
  else:
    TreatmentEquality_discr = (FN_discr/FP_discr)
  if TreatmentEquality_priv != 0 and TreatmentEquality_discr != 0:
    TreatmentEquality = TreatmentEquality_priv/TreatmentEquality_discr
  elif TreatmentEquality_priv == 0:
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality = 0 #min value

  if TN_priv+FN_priv == 0:
    FORParity_priv = 0
    FORParity = 2 #max value
  else:
    FORParity_priv = (FN_priv)/(TN_priv+FN_priv)
  if TN_discr+FN_discr == 0:
    FORParity_discr = 0
    FORParity = 0  #min value
  elif (FN_discr)/(TN_discr+FN_discr) == 0:
    FORParity_discr = 0
    FORParity = 0 #min value
  else:
    FORParity_discr = (FN_discr)/(TN_discr+FN_discr)
  if FORParity_priv != 0 and FORParity_discr != 0:
    FORParity = FORParity_priv/FORParity_discr
  elif FORParity_priv == 0:
    FORParity = 2 #max value
  else:
    FORParity = 0 #min value


  FN_P_discr = (FN_discr)/len_discr
  FN_P_priv = (FN_priv)/len_priv
  if FN_P_discr == 0:
    FN_metric = 2  #max value
  else:
    FN_metric = FN_P_priv/FN_P_discr


  FP_P_discr = (FP_discr)/len_discr
  FP_P_priv = (FP_priv)/len_priv
  if FP_P_priv == 0:
    FP_metric = 0  #min value
  else:
    FP_metric = FP_P_discr/FP_P_priv


  #RecallParity = (TP_discr/(TP_discr+FN_discr))/(TP_priv/(TP_priv+FN_priv))

  metrics = {}
  metrics['GroupFairness'] = [GroupFairness, GroupFairness_discr, GroupFairness_priv]
  metrics['PredictiveParity'] = [PredictiveParity, PredictiveParity_discr, PredictiveParity_priv]
  metrics['PredictiveEquality'] = [PredictiveEquality, PredictiveEquality_discr, PredictiveEquality_priv]
  metrics['EqualOpportunity'] = [EqualOpportunity, EqualOpportunity_discr, EqualOpportunity_priv]
  metrics['EqualizedOdds'] = [EqualizedOdds, EqualizedOdds1, EqualizedOdds2]
  metrics['ConditionalUseAccuracyEquality'] = [ConditionalUseAccuracyEquality, ConditionalUseAccuracyEquality1 , ConditionalUseAccuracyEquality2]
  metrics['OverallAccuracyEquality'] = [OverallAccuracyEquality, OAE1, OAE2]
  metrics['TreatmentEquality'] = [TreatmentEquality, TreatmentEquality_discr, TreatmentEquality_priv]
  metrics['FORParity'] = [FORParity, FORParity_discr, FORParity_priv]
  metrics['FN'] = [FN_metric, FN_P_discr, FN_P_priv]
  metrics['FP'] = [FP_metric, FP_P_discr, FP_P_priv]

  for k in metrics.keys():
    value = standardization(rescale(metrics[k][0]))
    discr = metrics[k][1]
    priv = metrics[k][2]
    metrics[k] = {'Value': value, 'Discr_group': discr, 'Priv_group': priv}

  return metrics


# Fairness metrics computed using subtraction operator
def fairness_metrics_subtraction(confusion_matrix, threshold = 0.15):

  TP_priv, TN_priv, FP_priv, FN_priv, len_priv = retrieve_values(confusion_matrix['privileged'])
  TP_discr, TN_discr, FP_discr, FN_discr, len_discr = retrieve_values(confusion_matrix['discriminated'])

  GroupFairness_discr = (TP_discr+FP_discr)/len_discr
  GroupFairness_priv = (TP_priv+FP_priv)/len_priv
  GroupFairness = GroupFairness_priv-GroupFairness_discr

  if (TP_discr+FP_discr) == 0:
    PredictiveParity_discr = 0
    PredictiveParity = -1  #min value
  else:
    PredictiveParity_discr = (TP_discr)/(TP_discr+FP_discr)
  if (TP_priv+FP_priv) == 0:
    PredictiveParity_priv = 0
    PredictiveParity = 1 #max value
  else:
    PredictiveParity_priv = (TP_priv)/(TP_priv+FP_priv)
  if PredictiveParity_priv != 0 and PredictiveParity_discr != 0:
    PredictiveParity = PredictiveParity_priv-PredictiveParity_discr
  elif PredictiveParity_priv == 0:
    PredictiveParity = 1 #max value
  else:
    PredictiveParity = -1 #min value

  if TN_discr+FP_discr == 0:
    PredictiveEquality_discr = 0
    PredictiveEquality = -1  #min value
  else:
    PredictiveEquality_discr = (FP_discr)/(TN_discr+FP_discr)
  if TN_priv+FP_priv == 0:
    PredictiveEquality_priv = 0
    PredictiveEquality = 1 #max value
  else:
    PredictiveEquality_priv = (FP_priv)/(TN_priv+FP_priv)
  if PredictiveEquality_priv != 0 and PredictiveEquality_discr != 0:
    PredictiveEquality = PredictiveEquality_priv-PredictiveEquality_discr
  elif PredictiveEquality_priv == 0:
    PredictiveEquality = 1 #max value
  else:
    PredictiveEquality = -1 #min value

  if TP_discr+FN_discr == 0:
    EqualOpportunity_discr = 0
    EqualOpportunity = -1  #min value
  else:
    EqualOpportunity_discr = (FN_discr)/(TP_discr+FN_discr)
  if TP_priv+FN_priv == 0:
    EqualOpportunity_priv = 0
    EqualOpportunity = 1 #max value
  else:
    EqualOpportunity_priv = (FN_priv)/(TP_priv+FN_priv)
  if EqualOpportunity_priv != 0 and EqualOpportunity_discr != 0:
    EqualOpportunity = EqualOpportunity_priv-EqualOpportunity_discr
  elif EqualOpportunity_priv == 0:
    EqualOpportunity = 1 #max value
  else:
    EqualOpportunity = -1 #min value

  if FN_discr+TP_discr == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = -1 #min value
  elif FN_priv+TP_priv == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 1 #max value
  else:
    EqualizedOdds1 = (TP_priv/(TP_priv+FN_priv))-(TP_discr/(TP_discr+FN_discr)) # (1-equalOpportunity_discr)/(1-equalOpportunity_priv)
  if FP_priv+TN_priv == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 1 #max value
  elif FP_discr+TN_discr == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = -1 #min value
  else:
    EqualizedOdds2 = (FP_priv/(TN_priv+FP_priv))-(FP_discr/(TN_discr+FP_discr)) # = PredictiveEquality
  if EqualizedOdds1 != 0 and EqualizedOdds2 != 0:
    EqualizedOdds = and_function(EqualizedOdds1, EqualizedOdds2, threshold)
  elif EqualizedOdds1 == 0:
    EqualizedOdds = 1 #max value
  else:
    EqualizedOdds = -1 #min value

  if TP_discr+FP_discr == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality= -1 #min value
  elif TP_priv+FP_priv == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality1 = (TP_priv/(TP_priv+FP_priv)) - (TP_discr/(TP_discr+FP_discr))
  if TN_discr+FN_discr == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = -1 #min value
  elif TN_priv+FN_priv == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality2 = (TN_priv/(TN_priv+FN_priv)) - (TN_discr/(TN_discr+FN_discr))
  if ConditionalUseAccuracyEquality1 != 0 and ConditionalUseAccuracyEquality2 != 0:
    ConditionalUseAccuracyEquality = and_function(ConditionalUseAccuracyEquality1, ConditionalUseAccuracyEquality2, threshold)
  elif ConditionalUseAccuracyEquality1 == 0:
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality = -1 #min value

  OAE1 = TP_priv-TP_discr
  OAE2 = TN_priv-TN_discr
  OverallAccuracyEquality = and_function(OAE1, OAE2, threshold)

  if FP_discr == 0:
    TreatmentEquality_discr = 0
    TreatmentEquality = 0 #min value
  else:
    TreatmentEquality_discr = (FN_discr/FP_discr)
  if FP_priv == 0:
    TreatmentEquality_priv = 0
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality_priv = (FN_priv/FP_priv)
  if TreatmentEquality_priv != 0 and TreatmentEquality_discr != 0:
    TreatmentEquality = TreatmentEquality_priv-TreatmentEquality_discr
  elif TreatmentEquality_priv == 0:
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality = 0 #min value

  if TN_discr+FN_discr == 0:
    FORParity_discr = 0
    FORParity = -1  #min value
  else:
    FORParity_discr = (FN_discr)/(TN_discr+FN_discr)
  if TN_priv+FN_priv == 0:
    FORParity_priv = 0
    FORParity = 1 #max value
  else:
    FORParity_priv = (FN_priv)/(TN_priv+FN_priv)
  if FORParity_priv != 0 and FORParity_discr != 0:
    FORParity = FORParity_priv-FORParity_discr
  elif FORParity_priv == 0:
    FORParity = 1 #max value
  else:
    FORParity = -1 #min value

  FN_P_discr =  (FN_discr)/len_discr
  FN_P_priv =  (FN_priv)/len_priv

  FP_P_discr = (FP_discr)/len_discr
  FP_P_priv =  (FP_priv)/len_priv

  #RecallParity = (TP_discr/(TP_discr+FN_discr))/(TP_priv/(TP_priv+FN_priv))

  metrics = {}
  metrics['GroupFairness'] = [GroupFairness, GroupFairness_discr, GroupFairness_priv]
  metrics['PredictiveParity'] = [PredictiveParity, PredictiveParity_discr, PredictiveParity_priv]
  metrics['PredictiveEquality'] = [PredictiveEquality, PredictiveEquality_discr, PredictiveEquality_priv]
  metrics['EqualOpportunity'] = [EqualOpportunity, EqualOpportunity_discr, EqualOpportunity_priv]
  metrics['EqualizedOdds'] = [EqualizedOdds, EqualizedOdds1, EqualizedOdds2]
  metrics['ConditionalUseAccuracyEquality'] = [ConditionalUseAccuracyEquality, ConditionalUseAccuracyEquality1 , ConditionalUseAccuracyEquality2]
  metrics['OverallAccuracyEquality'] = [OverallAccuracyEquality, OAE1, OAE2]
  metrics['TreatmentEquality'] = [TreatmentEquality, TreatmentEquality_discr, TreatmentEquality_priv]
  metrics['FORParity'] = [FORParity, FORParity_discr, FORParity_priv]
  metrics['FN'] = [FN_P_priv-FN_P_discr, FN_P_discr, FN_P_priv]
  metrics['FP'] = [FP_P_discr-FP_P_priv, FP_P_discr, FP_P_priv]

  for k in metrics.keys():
    value = standardization(metrics[k][0])
    discr = metrics[k][1]
    priv = metrics[k][2]
    metrics[k] = {'Value': value, 'Discr_group': discr, 'Priv_group': priv}

  return metrics

In [298]:
def compute_fairness_metrics(predictions_and_tests, target_variable_labels, models, n_splits):
  confusion_matrices = compute_confusion_matrices(predictions_and_tests, target_variable_labels, models, n_splits)
  fairness_metrics = {}
  sub_fairness_metrics = {}
  div_fairness_metrics = {}
  sub_dict = {}
  div_dict = {}
  #mitigation technique allow multiple models
  if mitigation not in without_model_mitigations:
    for model_name in (models):
      sub_dict = {}
      div_dict = {}
      for i in range(0,n_splits):
        model_split_conf_matrix = fairness_metrics_division(confusion_matrices[model_name][i])
        sub_dict[i] = fairness_metrics_subtraction(confusion_matrices[model_name][i])
        div_dict[i] = fairness_metrics_division(confusion_matrices[model_name][i])

      div_fairness_metrics[model_name] = div_dict
      sub_fairness_metrics[model_name] = sub_dict
  else:
    sub_dict = {}
    div_dict = {}
    for i in range(0,n_splits):
        sub_dict[i] = fairness_metrics_subtraction(confusion_matrices[i])
        div_dict[i] = fairness_metrics_division(confusion_matrices[i])

    div_fairness_metrics = div_dict
    sub_fairness_metrics = sub_dict

  fairness_metrics['division'] = div_fairness_metrics
  fairness_metrics['subtraction'] = sub_fairness_metrics

  return fairness_metrics

In [299]:
def compute_mean_std_dev_fairness_metrics(fairness_metrics, models):
  family_metrics = {}
  for f in family:
    model_metrics = {}
    #print(f)
    #mitigation technique allow multiple models
    if mitigation not in without_model_mitigations:
      metric_dict = {}
      for m in models:
        #print(m)
        for fair_m in fairness_catalogue:
          #print(fair_m)
          vec_metrics = []
          for i in range(0,n_splits):
            vec_metrics.append(fairness_metrics[f][m][i][fair_m]['Value'])
          #print(vec_metrics)
          #print(np.mean(vec_metrics), np.std(vec_metrics))
          metric_dict[fair_m] = [np.mean(vec_metrics), np.std(vec_metrics)]
        #print(metric_dict)
        model_metrics[m] = metric_dict
    #without multiple models
    else:
      metric_dict = {}
      for fair_m in fairness_catalogue:
        vec_metrics = []
        for i in range(0,n_splits):
          vec_metrics.append(fairness_metrics[f][i][fair_m]['Value'])
        metric_dict[fair_m] = [np.mean(vec_metrics), np.std(vec_metrics)]
      model_metrics = metric_dict

    family_metrics[f]=model_metrics

  return family_metrics

# Compute performance metrics

In [300]:
#for each model of the 10-fold compute the metrics
accuracy, precision, recall, f1_score = compute_scores(predictions_and_tests, models, n_splits)

if mitigation not in without_model_mitigations:
  #for each model compute mean and standard deviation
  acc = compute_mean_std_dev(accuracy, models)
  prec = compute_mean_std_dev(precision, models)
  rec = compute_mean_std_dev(recall, models)
  f1 = compute_mean_std_dev(f1_score, models)
else:
  acc = compute_mean_std_dev(accuracy, None)
  prec = compute_mean_std_dev(precision, None)
  rec = compute_mean_std_dev(recall, None)
  f1 = compute_mean_std_dev(f1_score, None)

In [301]:
print(accuracy, precision, recall, f1_score)
print(acc, prec, rec, f1)
performance_metrics = {}
performance_metrics['accuracy'] = acc
performance_metrics['precision'] = prec
performance_metrics['recall'] = rec
performance_metrics['f1_score'] = f1

{'Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Decision Tree': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Bagging': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Random Forest': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Extremely Randomized Trees': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Ada Boost': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} {'Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Decision Tree': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Bagging': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Random Forest': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Extremely Randomized Trees': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Ada Boost': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} {'Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Decision Tree': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'Baggin

In [302]:
#Save performance metrics
save_path = path_to_project + '/measurements/performance_metrics-{}-{}-{}.p'.format(dataset_name, sensible_attribute, mitigation)
with open(save_path, 'wb') as fp:
    pickle.dump(performance_metrics, fp, protocol=pickle.HIGHEST_PROTOCOL)

# Compute fairness metrics

In [303]:
if mitigation not in without_model_mitigations:
  print(compute_confusion_matrices(predictions_and_tests, target_variable_labels, models, n_splits))
else:
  print(compute_confusion_matrices(predictions_and_tests, target_variable_labels, None, n_splits))

{'Logistic Regression': {0: {'discriminated': array([[ 1,  0],
       [ 0, 12]]), 'privileged': array([[26,  0],
       [ 0, 28]])}, 1: {'discriminated': array([[ 3,  0],
       [ 0, 28]]), 'privileged': array([[14,  0],
       [ 0, 22]])}, 2: {'discriminated': array([[ 4,  0],
       [ 0, 15]]), 'privileged': array([[24,  0],
       [ 0, 24]])}, 3: {'discriminated': array([[ 8,  0],
       [ 0, 19]]), 'privileged': array([[16,  0],
       [ 0, 24]])}, 4: {'discriminated': array([[ 3,  0],
       [ 0, 19]]), 'privileged': array([[21,  0],
       [ 0, 24]])}, 5: {'discriminated': array([[ 5,  0],
       [ 0, 22]]), 'privileged': array([[20,  0],
       [ 0, 20]])}, 6: {'discriminated': array([[ 1,  0],
       [ 0, 24]]), 'privileged': array([[10,  0],
       [ 0, 32]])}, 7: {'discriminated': array([[ 4,  0],
       [ 0, 20]]), 'privileged': array([[15,  0],
       [ 0, 28]])}, 8: {'discriminated': array([[ 3,  0],
       [ 0, 22]]), 'privileged': array([[19,  0],
       [ 0, 23]])}, 9: 

In [304]:
if mitigation not in without_model_mitigations:
  fairness_metrics = compute_fairness_metrics(predictions_and_tests, target_variable_labels, models, n_splits)
else:
  fairness_metrics = compute_fairness_metrics(predictions_and_tests, target_variable_labels, None, n_splits)
print(fairness_metrics)

{'division': {'Logistic Regression': {0: {'GroupFairness': {'Value': 0.7802197802197803, 'Discr_group': 0.9230769230769231, 'Priv_group': 0.5185185185185185}, 'PredictiveParity': {'Value': 0.0, 'Discr_group': 1.0, 'Priv_group': 1.0}, 'PredictiveEquality': {'Value': 1, 'Discr_group': 0.0, 'Priv_group': 0.0}, 'EqualOpportunity': {'Value': -1, 'Discr_group': 0.0, 'Priv_group': 0.0}, 'EqualizedOdds': {'Value': 1, 'Discr_group': 1.0, 'Priv_group': 2}, 'ConditionalUseAccuracyEquality': {'Value': 0.0, 'Discr_group': 1.0, 'Priv_group': 1.0}, 'OverallAccuracyEquality': {'Value': -0.9615384615384616, 'Discr_group': 0.42857142857142855, 'Priv_group': 0.038461538461538464}, 'TreatmentEquality': {'Value': 1, 'Discr_group': 0, 'Priv_group': 0}, 'FORParity': {'Value': 1, 'Discr_group': 0, 'Priv_group': 0.0}, 'FN': {'Value': 1, 'Discr_group': 0.0, 'Priv_group': 0.0}, 'FP': {'Value': -1, 'Discr_group': 0.0, 'Priv_group': 0.0}}, 1: {'GroupFairness': {'Value': 0.4780058651026391, 'Discr_group': 0.9032258

In [305]:
model_to_print = "Logistic Regression"
m = 'GroupFairness'
round_value = 5

if mitigation not in without_model_mitigations:
  print(m, np.round(fairness_metrics["division"][model_to_print][1][m]["Value"], round_value))
else:
  print(m, np.round(fairness_metrics["division"][1][m]["Value"], round_value))

GroupFairness 0.47801


In [306]:
if mitigation not in without_model_mitigations:
  final_metrics = compute_mean_std_dev_fairness_metrics(fairness_metrics, models)
else:
  final_metrics = compute_mean_std_dev_fairness_metrics(fairness_metrics, None)

print(final_metrics)

{'division': {'Logistic Regression': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'EqualOpportunity': [-1.0, 0.0], 'EqualizedOdds': [1.0, 0.0], 'ConditionalUseAccuracyEquality': [0.0, 0.0], 'OverallAccuracyEquality': [-0.6914249785302417, 0.3423649428824404], 'TreatmentEquality': [1.0, 0.0], 'FORParity': [1.0, 0.0], 'FN': [1.0, 0.0], 'FP': [-1.0, 0.0]}, 'Decision Tree': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'EqualOpportunity': [-1.0, 0.0], 'EqualizedOdds': [1.0, 0.0], 'ConditionalUseAccuracyEquality': [0.0, 0.0], 'OverallAccuracyEquality': [-0.6914249785302417, 0.3423649428824404], 'TreatmentEquality': [1.0, 0.0], 'FORParity': [1.0, 0.0], 'FN': [1.0, 0.0], 'FP': [-1.0, 0.0]}, 'Bagging': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'Eq

In [307]:
model_to_print = "Logistic Regression"
round_value = 5
for f in family:
  print(f)
  for m in fairness_catalogue:
    if mitigation not in without_model_mitigations:
      print(m, np.round(final_metrics[f][model_to_print][m][0], round_value))
    else:
      print(m, np.round(final_metrics[f][m][0], round_value))


division
GroupFairness 0.49398
PredictiveParity 0.0
PredictiveEquality 1.0
EqualOpportunity -1.0
EqualizedOdds 1.0
ConditionalUseAccuracyEquality 0.0
OverallAccuracyEquality -0.69142
TreatmentEquality 1.0
FORParity 1.0
FN 1.0
FP -1.0
subtraction
GroupFairness -0.27325
PredictiveParity 0.0
PredictiveEquality 1.0
EqualOpportunity 1.0
EqualizedOdds 1.0
ConditionalUseAccuracyEquality 1.0
OverallAccuracyEquality 1.0
TreatmentEquality 1.0
FORParity 1.0
FN 0.0
FP 0.0


In [308]:
#Save the metrics results
save_path = path_to_project + '/measurements/metrics-{}-{}-{}.p'.format(dataset_name, sensible_attribute, mitigation)
with open(save_path, 'wb') as fp:
    pickle.dump(final_metrics, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [309]:
print(final_metrics)

{'division': {'Logistic Regression': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'EqualOpportunity': [-1.0, 0.0], 'EqualizedOdds': [1.0, 0.0], 'ConditionalUseAccuracyEquality': [0.0, 0.0], 'OverallAccuracyEquality': [-0.6914249785302417, 0.3423649428824404], 'TreatmentEquality': [1.0, 0.0], 'FORParity': [1.0, 0.0], 'FN': [1.0, 0.0], 'FP': [-1.0, 0.0]}, 'Decision Tree': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'EqualOpportunity': [-1.0, 0.0], 'EqualizedOdds': [1.0, 0.0], 'ConditionalUseAccuracyEquality': [0.0, 0.0], 'OverallAccuracyEquality': [-0.6914249785302417, 0.3423649428824404], 'TreatmentEquality': [1.0, 0.0], 'FORParity': [1.0, 0.0], 'FN': [1.0, 0.0], 'FP': [-1.0, 0.0]}, 'Bagging': {'GroupFairness': [0.49397696669560676, 0.18483997111173806], 'PredictiveParity': [0.0, 0.0], 'PredictiveEquality': [1.0, 0.0], 'Eq

In [310]:
print(dataset_name, mitigation)

diabetes-women aif360-roc


# Extra

Print example of metrics for a given model, e.g., Logistic Regression.

In [None]:
model_to_print = "Logistic Regression"
round_value = 5

print("Division \n")
for m in metrics:
  print(m, np.round(overall_metrics["division"][model_to_print][m]["Value"], round_value))
print("\nSubtraction \n")
for m in metrics:
  print(m, np.round(overall_metrics["subtraction"][model_to_print][m]["Value"], round_value))

Division 

GroupFairness 0.03943
PredictiveParity 0.0
PredictiveEquality nan
EqualOpportunity -1.0
EqualizedOdds -0.01111
ConditionalUseAccuracyEquality 0.0
OverallAccuracyEquality -0.48043
TreatmentEquality nan
FORParity -1.0
FN -1.0
FP nan

Subtraction 

GroupFairness -0.01431
PredictiveParity 0.0
PredictiveEquality 0.0
EqualOpportunity -0.01111
EqualizedOdds 0.0
ConditionalUseAccuracyEquality 0.0
OverallAccuracyEquality 1
TreatmentEquality nan
FORParity -0.0068
FN -0.00424
FP 0.0
