# Import Libraries





In [1]:
try:
  from google.colab import drive
  drive.mount('/content/drive')
  import sys
  path_to_project = '/content/drive/MyDrive/FairAlgorithm'
  sys.path.append(path_to_project)
  !sudo apt install libcairo2-dev pkg-config python3-dev
  IN_COLAB = True
except:
  IN_COLAB = False

Mounted at /content/drive
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
python3-dev is already the newest version (3.10.6-1~22.04.1).
python3-dev set to manually installed.
The following packages were automatically installed and are no longer required:
  libbz2-dev libpkgconf3 libreadline-dev
Use 'sudo apt autoremove' to remove them.
The following additional packages will be installed:
  libblkid-dev libblkid1 libcairo-script-interpreter2 libffi-dev
  libglib2.0-dev libglib2.0-dev-bin libice-dev liblzo2-2 libmount-dev
  libmount1 libpixman-1-dev libselinux1-dev libsepol-dev libsm-dev
  libxcb-render0-dev libxcb-shm0-dev
Suggested packages:
  libcairo2-doc libgirepository1.0-dev libglib2.0-doc libgdk-pixbuf2.0-bin
  | libgdk-pixbuf2.0-dev libxml2-utils libice-doc cryptsetup-bin libsm-doc
The following packages will be REMOVED:
  pkgconf r-base-dev
The following NEW packages will be installed:
  libblkid-dev libcairo-script-interpreter2 

In [2]:
#import libraries
import numpy as np
import pandas as pd
import pickle
#from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import cross_validate,cross_val_score,cross_val_predict,train_test_split,StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from tqdm.notebook import tqdm

# Configure the notebook


In the next code cell, set all the variables that are used throughout the notebook.  
The variables are used to configure the notebook, and to set the paths to the data files.

Modify the variables in the next code cell to configure the notebook

- `dataset_name`: The name of the dataset file.
- `dataset_path`: The path to the dataset file.
- `target`: The target feature to predict.
- `target_variable_labels`: The labels for the target feature.
- `sensible_attribute`: The sensible attribute to use for bias mitigation.

In [99]:
#INPUT
#dataset_name = "diabetes-women"
#dataset_name = "sepsis"
#dataset_name = "aids"
#dataset_name = "diabetes-prediction" #DOES NOT WORK
#dataset_name = "stroke-prediction" #DOES NOT WORK
#dataset_name = "myocardial-infarction"
#dataset_name = "alzheimer-disease"

if dataset_name == "diabetes-women":
  ignore_cols = ['Age']
  target_variable = 'Outcome'
  target_variable_labels= [1,0]
  sensible_attribute = 'AgeCategory'
  default_mappings = {
      'label_maps': [{1.0: 'Diabetic', 0.0: 'NonDiabetic'}],
      'protected_attribute_maps': [{1.0: 'Adult', 0.0: 'Young'}]
  }
elif dataset_name == "sepsis":
  ignore_cols = []
  target_variable = 'Mortality'
  target_variable_labels= [1,0]
  sensible_attribute = 'Gender_cat'
  #sensible_attribute = 'Age_cat'
elif dataset_name == 'aids':
  ignore_cols = []
  target_variable = 'cid'
  target_variable_labels= [1, 0]
  sensible_attribute = 'homo_cat'
  #sensible_attribute = 'race_cat'
  #sensible_attribute = 'age_cat'
elif dataset_name == "myocardial-infarction":
  ignore_cols = []
  target_variable = 'LET_IS_cat'
  target_variable_labels= [1,0]
  sensible_attribute = 'SEX'
  default_mappings = {
      'label_maps': [{1: 'Complication', 0: 'No Alzheimer'}],
      'protected_attribute_maps': [{0: 'Female', 1: 'Male'}]
	}
elif dataset_name == 'alzheimer-disease':
  ignore_cols = []
  target_variable = 'Diagnosis'
  target_variable_labels= [1, 0]
  #sensible_attribute = 'Gender_cat'
  sensible_attribute = 'Ethnicity_cat'
  default_mappings = {
      'label_maps': [{1: 'Alzheimer', 0: 'No Alzheimer'}],
      'protected_attribute_maps': [{"Ethnicity_cat":{1: "Caucasian", 0: "Non-Caucasian"}, "Gender_cat":{1: "Male", 0: "Female"}}]
	}
elif dataset_name == "diabetes-prediction":
  ignore_cols = []
  target_variable = 'diabetes'
  target_variable_labels= ['No Diabetes','Diabetes']
  sensible_attribute = 'race_category'
  default_mappings = {'label_maps': [{1.0: 'Diabetic', 0.0: 'NonDiabetic'}],
                      'protected_attribute_maps': [{1.0: 'Caucasian', 0.0: 'Non-Caucasian'}] }
elif dataset_name == "stroke-prediction":
  ignore_cols = []
  target_variable = 'stroke_prediction'
  target_variable_labels= ['No Stroke','Stroke']
  sensible_attribute = 'residence_category'

  default_mappings = {
      'label_maps': [{1.0: 'Stroke', 0.0: 'No Stroke'}],
      'protected_attribute_maps': [{1.0: 'Urban', 0.0: 'Rural'}]
  }

dataset_path = path_to_project + '/data/preprocessed/preprocessed-{}.csv'.format(dataset_name) if IN_COLAB else 'data/preprocessed/preprocessed-{}.csv'.format(dataset_name)
df = pd.read_csv(dataset_path)
df = df.drop(columns=ignore_cols)
feature_cols = df.columns

mitigation = ['original']
without_model_mitigations = []

In [84]:
n_estimators = 30
random_seed = 1234
n_splits= 10

models = {'Logistic Regression':LogisticRegression(max_iter=500),
          'Decision Tree':DecisionTreeClassifier(max_depth=None),
          'Bagging':BaggingClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=n_estimators),
          'Random Forest':RandomForestClassifier(n_estimators=n_estimators),
          'Extremely Randomized Trees':ExtraTreesClassifier(n_estimators=n_estimators),
          'Ada Boost':AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=n_estimators)}

family = ['division', 'subtraction']
fairness_catalogue = ['GroupFairness', 'PredictiveParity', 'PredictiveEquality', 'EqualOpportunity', 'EqualizedOdds', 'ConditionalUseAccuracyEquality', 'OverallAccuracyEquality', 'TreatmentEquality', 'FORParity', 'FN', 'FP']


In [85]:
config = {}
config['df']= df
config['target_variable'] = target_variable
config['sensible_attribute'] = sensible_attribute
config['path_to_project'] = path_to_project
config['n_splits'] = n_splits
config['models'] = models
config['n_estimators'] = n_estimators
config['random_seed'] = random_seed

In [86]:
def unpack_config(config):
  return config['df'], config['target_variable'], config['sensible_attribute'], config['path_to_project'], config['n_splits'], config['models'], config['n_estimators'],config['random_seed']

# Utils


In [87]:
def train_test_splitting(df, n_splits):
  df_splitting = {}

  w = int(len(df)/n_splits)
  window = w
  start_point = 0
  for i in range(0,n_splits):
      train = {}
      test = {}
      df_train_1 = {}
      df_train_2 = {}
      df_test = df[start_point:window]
      if i != 0:
        df_train_1 = df[0: start_point]

      if i != n_splits-1:
        df_train_2 = df[window: len(df)]

      if (i != 0 and  i != n_splits-1):
        concat_df = [df_train_1, df_train_2]
        df_train = pd.concat(concat_df)
      elif i != 0:
        df_train = df_train_1
      else:
        df_train = df_train_2

      start_point= window
      window = window + w

      df_splitting[i] = {'train': df_train, 'test': df_test}
  return df_splitting

In [88]:
def df_X_Y_split(df_train, df_test, target_variable):
  Y_train = df_train[target_variable]
  X_train = df_train.drop(target_variable, axis=1)
  Y_test = df_test[target_variable]
  X_test = df_test.drop(target_variable, axis=1)
  return X_train, Y_train, X_test, Y_test

In [89]:
def compute_predictions_and_tests(df, target_variable, n_splits, models, n_estimators, random_seed):
  predicted_and_real_values = {}
  for model_name in tqdm(models):
    clf = models[model_name]
    df_splitting = train_test_splitting(df, n_splits)
    pred_and_y = {}
    for i in range(0,n_splits):
      df_split = df_splitting[i]
      df_train = df_split['train']
      df_test = df_split['test']

      X_train, Y_train, X_test, Y_test = df_X_Y_split(df_train, df_test, target_variable)
      clf.fit(X_train,Y_train)
      y_pred = clf.predict(X_test)

      S_test = X_test[sensible_attribute].values
      pred_and_y[i] = {'y_test': Y_test.to_numpy().astype(int), 'y_pred': y_pred.astype(int), 's_test':  S_test.astype(int)}

    predicted_and_real_values[model_name] = pred_and_y

  return predicted_and_real_values


In [90]:
def check_results(results):
  for model_name in tqdm(models):
    for i in range(0,n_splits):
      y_test = results[model_name][i]['y_test']
      y_pred = results[model_name][i]['y_pred']
      for j in range(0,67):
        if y_test[j] != y_pred[j]:
          print(y_test[j], y_pred[j])

# Libraries to build Confusion Matrix and compute performance metrics

We need to save the indexes of both groups `privileged` and `discriminated` in two lists.

`y_privileged` is the part the dataset where `sensible_value` = 1 (for example `AgeCategory` = 1), and `y_discriminated` is the part of dataset where `sensible_value` = 0.

Build the confusion matrices (one for the privileged group, one for the discriminated group) for each model.


Not for in-processing that has only one ML model!

In [91]:
def compute_scores(predictions_and_tests, models, n_splits):
  precision = {}
  recall = {}
  accuracy = {}
  f1_score = {}

  if mitigation not in without_model_mitigations:
    for model_name in (models):

      precisions = []
      recalls = []
      accuracys = []
      f1_scores = []
      for i in range(0,n_splits):
        y_test = predictions_and_tests[model_name][i]['y_test']
        y_pred = predictions_and_tests[model_name][i]['y_pred']
        precisions.append(metrics.precision_score(y_test, y_pred))
        recalls.append(metrics.recall_score(y_test, y_pred))
        accuracys.append(metrics.accuracy_score(y_test, y_pred))
        f1_scores.append(metrics.f1_score(y_test, y_pred))
      precision[model_name] = precisions
      recall[model_name] = recalls
      accuracy[model_name] = accuracys
      f1_score[model_name] = f1_scores
  else:
    precisions = []
    recalls = []
    accuracys = []
    f1_scores = []
    for i in range(0,n_splits):
        y_test = predictions_and_tests[i]['y_test']
        y_pred = predictions_and_tests[i]['y_pred']
        precisions.append(metrics.precision_score(y_test, y_pred))
        recalls.append(metrics.recall_score(y_test, y_pred))
        accuracys.append(metrics.accuracy_score(y_test, y_pred))
        f1_scores.append(metrics.f1_score(y_test, y_pred))
    precision = precisions
    recall = recalls
    accuracy = accuracys
    f1_score = f1_scores
  return accuracy, precision, recall, f1_score

In [92]:
def compute_mean_std_dev(metric_list, models):
  metric_dict = {}
  if models is not None:
    for model_name in (models):
      metric = np.array(metric_list[model_name])
      mean_metric = metric.mean()
      std_metric = metric.std()
      metric_dict[model_name] = [mean_metric, std_metric]
  else:
    metric = np.array(metric_list)
    mean_metric = metric.mean()
    std_metric = metric.std()
    metric_dict = [mean_metric, std_metric]
  return metric_dict

In [93]:
def compute_confusion_matrices(predictions_and_tests, target_variable_labels, models, n_splits):
  confusion_matrices = {}
  if mitigation not in without_model_mitigations:
    for model_name in (models):
      cm_splits = {}
      for i in range(0,n_splits):
        temp_dict = {}
        cm_priviliged = {}
        cm_discriminated = {}
        y_test = predictions_and_tests[model_name][i]['y_test']
        y_pred = predictions_and_tests[model_name][i]['y_pred']
        s_test = predictions_and_tests[model_name][i]['s_test']

        df_metrics = pd.DataFrame({'s_test': s_test, 'y_test':y_test, 'y_pred':y_pred})

        df_discrim = df_metrics[df_metrics['s_test'] == 0]
        #len_dicr = len(df_discrim)
        df_priv = df_metrics[df_metrics['s_test'] == 1]
        #len_priv = len(df_priv)

        cm_discriminated = confusion_matrix(df_discrim['y_test'], df_discrim['y_pred'], labels=target_variable_labels)
        cm_privileged = confusion_matrix(df_priv['y_test'], df_priv['y_pred'], labels=target_variable_labels)
        temp_dict['discriminated'] = cm_discriminated
        temp_dict['privileged'] = cm_privileged
        cm_splits[i] = temp_dict
      confusion_matrices[model_name] = cm_splits
  else:
    cm_splits = {}
    for i in range(0,n_splits):
      temp_dict = {}
      cm_priviliged = {}
      cm_discriminated = {}
      y_test = predictions_and_tests[i]['y_test']
      y_pred = predictions_and_tests[i]['y_pred']
      s_test = predictions_and_tests[i]['s_test']

      df_metrics = pd.DataFrame({'s_test': s_test, 'y_test':y_test, 'y_pred':y_pred})

      df_discrim = df_metrics[df_metrics['s_test'] == 0]
      #len_dicr = len(df_discrim)
      df_priv = df_metrics[df_metrics['s_test'] == 1]
      #len_priv = len(df_priv)

      cm_discriminated = confusion_matrix(df_discrim['y_test'], df_discrim['y_pred'])
      cm_privileged = confusion_matrix(df_priv['y_test'], df_priv['y_pred'])
      temp_dict['discriminated'] = cm_discriminated
      temp_dict['privileged'] = cm_privileged
      cm_splits[i] = temp_dict
      confusion_matrices = cm_splits
  return confusion_matrices

##Functions to compute fairness metrics

Terminology:

- d is the predicted value,
- Y is the actual value in the dataset
- G the protected attribute, priv= privileged group, discr=discriminated group
- L is the legittimate attribute (only for Conditional Statistical Parity)

Fairness Metrics List:

1. Group Fairness: (d=1|G=priv) = (d=1|G=discr)
2. Predictive Parity: (Y=1|d=1,G=priv) = (Y=1|d=1,G=discr)
3. Predictive Equality: (d=1|Y=0,G=priv) = (d=1|Y=0,G=discr)
4. Equal Opportunity:  (d=0|Y=1,G=priv) = (d=0|Y=1,G=discr)
5. Equalized Odds: (d=1|Y=i,G=priv) = (d=1|Y=i,G=discr), i ∈ 0,1
6. ConditionalUseAccuracyEquality: (Y=1|d=1, G=priv) = (Y=1|d=1,G=discr) and (Y=0|d=0,G=priv) = (Y=0|d=0,G=discr)
7. Overall Accuracy Equality: (d=Y, G=priv) = (d=Y, G=priv)
8. Treatment Equality: (Y=1, d=0, G=priv)/(Y=0, d=1, G=priv) = (Y=1, d=0, G=discr)/(Y=0, d=1, G=discr)
9. FOR Parity: (Y=1|d=0, G=priv) = (Y=1|d=0,G=discr)

How to evaluate the results?

Looking at the value for each corresponding metric:

- If the value is between 0 and 1-t the discriminated group suffers from unfairness
- If the value is greater than 1+t the privileged group suffers from unfairness
- If the value is between 1-t and 1+t both privileged and discriminated group have a fair treatment

t is a threshold that should be choose by the user according to the context and the goal of the task.


In [94]:
# Retrieve TP, TN, FP, FN values from a confusion matrix
def retrieve_values(cm):
  TN = cm[0][0]
  FP = cm[0][1]
  FN = cm[1][0]
  TP = cm[1][1]
  total = TN+FP+FN+TP
  return TP, TN, FP, FN, total

def rescale(metric):
  metric = metric - 1
  return metric

def standardization(metric):
  if metric > 1:
    metric = 1
  elif metric < -1:
    metric = -1
  return metric

def valid(metric, th):
  if metric > 1-th and metric < 1+th:
    return True
  return False

def and_function(m1, m2, th):
  if m1 > 1+th and m2 > 1+th:
    return max(m1, m2)
  elif m1 < 1-th and m2 < 1-th:
    return min(m1, m2)
  elif valid(m1, th) and valid(m2, th):
    return max(m1, m2)
  elif (valid(m1, th) or valid(m2, th)) and (m1 > 1+th or m2 > 1+th):
    return max(m1, m2)
  elif (valid(m1, th) or valid(m2, th)) and (m1 < 1-th or m2 < 1-th):
    return min(m1, m2)
  else:
    return max(m1, m2)

In [95]:
# Fairness metrics computed using division operator
def fairness_metrics_division(confusion_matrix, threshold = 0.15):

  TP_priv, TN_priv, FP_priv, FN_priv, len_priv = retrieve_values(confusion_matrix['privileged'])
  TP_discr, TN_discr, FP_discr, FN_discr, len_discr = retrieve_values(confusion_matrix['discriminated'])

  GroupFairness_discr = (TP_discr+FP_discr)/len_discr
  GroupFairness_priv = (TP_priv+FP_priv)/len_priv
  if GroupFairness_priv == 0:
    GroupFairness = 2  #max value
  else:
     GroupFairness = GroupFairness_discr/GroupFairness_priv

  if TP_discr+FP_discr == 0:
    PredictiveParity_discr = 0
    PredictiveParity = 0  #min value
  else:
    PredictiveParity_discr = (TP_discr)/(TP_discr+FP_discr)
  if TP_priv+FP_priv == 0:
    PredictiveParity_priv = 0
    PredictiveParity = 2  #max value
  else:
    PredictiveParity_priv = (TP_priv)/(TP_priv+FP_priv)
  if PredictiveParity_discr != 0 and PredictiveParity_priv != 0:
    PredictiveParity = PredictiveParity_discr/PredictiveParity_priv
  elif PredictiveParity_priv == 0:
    PredictiveParity = 2  #max value
  else:
    PredictiveParity = 0  #min value

  if TN_discr+FP_discr == 0:
    PredictiveEquality_discr = 0
    PredictiveEquality = 0  #min value
  else:
    PredictiveEquality_discr = (FP_discr)/(TN_discr+FP_discr)
  if TN_priv+FP_priv == 0:
    PredictiveEquality_priv = 0
    PredictiveEquality = 2  #max value
  else:
    PredictiveEquality_priv = (FP_priv)/(TN_priv+FP_priv)
  if PredictiveEquality_discr != 0 and PredictiveEquality_priv != 0:
    PredictiveEquality = PredictiveEquality_discr/PredictiveEquality_priv
  elif PredictiveEquality_priv == 0:
    PredictiveEquality = 2  #max value
  else:
    PredictiveEquality = 0  #min value

  if FN_priv+TP_priv == 0:
    EqualOpportunity_priv = 0
    EqualOpportunity = 2  #max value
  else:
    EqualOpportunity_priv = (FN_priv)/(TP_priv+FN_priv)
  if FN_discr+TP_discr == 0:
    EqualOpportunity_discr = 0
    EqualOpportunity = 0  #min value
  else:
    EqualOpportunity_discr = (FN_discr)/(TP_discr+FN_discr)
  if EqualOpportunity_priv != 0 and EqualOpportunity_discr != 0:
    EqualOpportunity = EqualOpportunity_priv/EqualOpportunity_discr
  elif EqualOpportunity_discr == 0:
    EqualOpportunity = 0  #min value
  else:
    EqualOpportunity = 2  #max value

  if FN_discr+TP_discr == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 0 #min value
  elif FN_priv+TP_priv == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 2 #max value
  elif (TP_priv/(TP_priv+FN_priv)) == 0:
    EqualizedOdds1 = 2 #max value
  else:
    EqualizedOdds1 = ((TP_discr/(TP_discr+FN_discr)) / (TP_priv/(TP_priv+FN_priv))) # (1-equalOpportunity_discr)/(1-equalOpportunity_priv)
  if TN_priv+FP_priv == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 2 #max value
  elif TN_discr+FP_discr == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 0 #min value
  elif (FP_priv/(TN_priv+FP_priv)) == 0:
    EqualizedOdds2 = 2 #max value
  else:
    EqualizedOdds2 = ((FP_discr/(TN_discr+FP_discr)) / (FP_priv/(TN_priv+FP_priv))) # = PredictiveEquality
  # EqualizedOdds = (EqualizedOdds1 * EqualizedOdds2)
  if EqualizedOdds1 != 0 and EqualizedOdds2 != 0:
    EqualizedOdds = and_function(EqualizedOdds1, EqualizedOdds2, threshold)
  else:
    EqualizedOdds = 2 #max value

  if TP_discr+FP_discr == 0 or TN_discr+FP_discr == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality= 0 #min value
  elif (TP_priv/(TP_priv+FP_priv)) == 0:
    ConditionalUseAccuracyEquality1 = 2 #max value
  else:
    ConditionalUseAccuracyEquality1 = ((TP_discr/(TP_discr+FP_discr)) / (TP_priv/(TP_priv+FP_priv)))
  if TN_discr+FN_discr == 0 or TN_priv+FN_priv == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = 2 #max value
  elif (TN_priv/(TN_priv+FN_priv)) == 0:
    ConditionalUseAccuracyEquality2 = 2 #max value
  else:
    ConditionalUseAccuracyEquality2 = ((TN_discr/(TN_discr+FN_discr)) / (TN_priv/(TN_priv+FN_priv)))
  # ConditionalUseAccuracyEquality = (ConditionalUseAccuracyEquality1 * ConditionalUseAccuracyEquality2)
  if ConditionalUseAccuracyEquality1 != 0 and ConditionalUseAccuracyEquality2 != 0:
    ConditionalUseAccuracyEquality = and_function(ConditionalUseAccuracyEquality1, ConditionalUseAccuracyEquality2, threshold)
  else:
    ConditionalUseAccuracyEquality = 2 #max value

  if TP_priv == 0:
    OAE1 = 0
    OverallAccuracyEquality = 2 #max value
  else:
    OAE1 = TP_discr/TP_priv
  if TN_priv == 0:
    OAE2 = 0
    OverallAccuracyEquality = 2 #max value
  else:
    OAE2 = TN_discr/TN_priv
  # OverallAccuracyEquality = (OAE1 * OAE2)
  if OAE1 != 0 and OAE2 != 0:
    OverallAccuracyEquality = and_function(OAE1, OAE2, threshold)
  else:
    OverallAccuracyEquality = 2 #max value

  if FP_priv == 0:
    TreatmentEquality_priv = 0
    TreatmentEquality = 2  #max value
  else:
    TreatmentEquality_priv = (FN_priv/FP_priv)
  if FP_discr == 0:
    TreatmentEquality_discr = 0
    TreatmentEquality = 0 #min value
  elif (FN_discr/FP_discr) == 0:
    TreatmentEquality_discr = 0 #max value
    TreatmentEquality = 0 #min value
  else:
    TreatmentEquality_discr = (FN_discr/FP_discr)
  if TreatmentEquality_priv != 0 and TreatmentEquality_discr != 0:
    TreatmentEquality = TreatmentEquality_priv/TreatmentEquality_discr
  elif TreatmentEquality_priv == 0:
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality = 0 #min value

  if TN_priv+FN_priv == 0:
    FORParity_priv = 0
    FORParity = 2 #max value
  else:
    FORParity_priv = (FN_priv)/(TN_priv+FN_priv)
  if TN_discr+FN_discr == 0:
    FORParity_discr = 0
    FORParity = 0  #min value
  elif (FN_discr)/(TN_discr+FN_discr) == 0:
    FORParity_discr = 0
    FORParity = 0 #min value
  else:
    FORParity_discr = (FN_discr)/(TN_discr+FN_discr)
  if FORParity_priv != 0 and FORParity_discr != 0:
    FORParity = FORParity_priv/FORParity_discr
  elif FORParity_priv == 0:
    FORParity = 2 #max value
  else:
    FORParity = 0 #min value


  FN_P_discr = (FN_discr)/len_discr
  FN_P_priv = (FN_priv)/len_priv
  if FN_P_discr == 0:
    FN_metric = 2  #max value
  else:
    FN_metric = FN_P_priv/FN_P_discr


  FP_P_discr = (FP_discr)/len_discr
  FP_P_priv = (FP_priv)/len_priv
  if FP_P_priv == 0:
    FP_metric = 0  #min value
  else:
    FP_metric = FP_P_discr/FP_P_priv


  #RecallParity = (TP_discr/(TP_discr+FN_discr))/(TP_priv/(TP_priv+FN_priv))

  metrics = {}
  metrics['GroupFairness'] = [GroupFairness, GroupFairness_discr, GroupFairness_priv]
  metrics['PredictiveParity'] = [PredictiveParity, PredictiveParity_discr, PredictiveParity_priv]
  metrics['PredictiveEquality'] = [PredictiveEquality, PredictiveEquality_discr, PredictiveEquality_priv]
  metrics['EqualOpportunity'] = [EqualOpportunity, EqualOpportunity_discr, EqualOpportunity_priv]
  metrics['EqualizedOdds'] = [EqualizedOdds, EqualizedOdds1, EqualizedOdds2]
  metrics['ConditionalUseAccuracyEquality'] = [ConditionalUseAccuracyEquality, ConditionalUseAccuracyEquality1 , ConditionalUseAccuracyEquality2]
  metrics['OverallAccuracyEquality'] = [OverallAccuracyEquality, OAE1, OAE2]
  metrics['TreatmentEquality'] = [TreatmentEquality, TreatmentEquality_discr, TreatmentEquality_priv]
  metrics['FORParity'] = [FORParity, FORParity_discr, FORParity_priv]
  metrics['FN'] = [FN_metric, FN_P_discr, FN_P_priv]
  metrics['FP'] = [FP_metric, FP_P_discr, FP_P_priv]

  for k in metrics.keys():
    value = standardization(rescale(metrics[k][0]))
    discr = metrics[k][1]
    priv = metrics[k][2]
    metrics[k] = {'Value': value, 'Discr_group': discr, 'Priv_group': priv}

  return metrics


# Fairness metrics computed using subtraction operator
def fairness_metrics_subtraction(confusion_matrix, threshold = 0.15):

  TP_priv, TN_priv, FP_priv, FN_priv, len_priv = retrieve_values(confusion_matrix['privileged'])
  TP_discr, TN_discr, FP_discr, FN_discr, len_discr = retrieve_values(confusion_matrix['discriminated'])

  GroupFairness_discr = (TP_discr+FP_discr)/len_discr
  GroupFairness_priv = (TP_priv+FP_priv)/len_priv
  GroupFairness = GroupFairness_priv-GroupFairness_discr

  if (TP_discr+FP_discr) == 0:
    PredictiveParity_discr = 0
    PredictiveParity = -1  #min value
  else:
    PredictiveParity_discr = (TP_discr)/(TP_discr+FP_discr)
  if (TP_priv+FP_priv) == 0:
    PredictiveParity_priv = 0
    PredictiveParity = 1 #max value
  else:
    PredictiveParity_priv = (TP_priv)/(TP_priv+FP_priv)
  if PredictiveParity_priv != 0 and PredictiveParity_discr != 0:
    PredictiveParity = PredictiveParity_priv-PredictiveParity_discr
  elif PredictiveParity_priv == 0:
    PredictiveParity = 1 #max value
  else:
    PredictiveParity = -1 #min value

  if TN_discr+FP_discr == 0:
    PredictiveEquality_discr = 0
    PredictiveEquality = -1  #min value
  else:
    PredictiveEquality_discr = (FP_discr)/(TN_discr+FP_discr)
  if TN_priv+FP_priv == 0:
    PredictiveEquality_priv = 0
    PredictiveEquality = 1 #max value
  else:
    PredictiveEquality_priv = (FP_priv)/(TN_priv+FP_priv)
  if PredictiveEquality_priv != 0 and PredictiveEquality_discr != 0:
    PredictiveEquality = PredictiveEquality_priv-PredictiveEquality_discr
  elif PredictiveEquality_priv == 0:
    PredictiveEquality = 1 #max value
  else:
    PredictiveEquality = -1 #min value

  if TP_discr+FN_discr == 0:
    EqualOpportunity_discr = 0
    EqualOpportunity = -1  #min value
  else:
    EqualOpportunity_discr = (FN_discr)/(TP_discr+FN_discr)
  if TP_priv+FN_priv == 0:
    EqualOpportunity_priv = 0
    EqualOpportunity = 1 #max value
  else:
    EqualOpportunity_priv = (FN_priv)/(TP_priv+FN_priv)
  if EqualOpportunity_priv != 0 and EqualOpportunity_discr != 0:
    EqualOpportunity = EqualOpportunity_priv-EqualOpportunity_discr
  elif EqualOpportunity_priv == 0:
    EqualOpportunity = 1 #max value
  else:
    EqualOpportunity = -1 #min value

  if FN_discr+TP_discr == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = -1 #min value
  elif FN_priv+TP_priv == 0:
    EqualizedOdds1 = 0
    EqualizedOdds = 1 #max value
  else:
    EqualizedOdds1 = (TP_priv/(TP_priv+FN_priv))-(TP_discr/(TP_discr+FN_discr)) # (1-equalOpportunity_discr)/(1-equalOpportunity_priv)
  if FP_priv+TN_priv == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = 1 #max value
  elif FP_discr+TN_discr == 0:
    EqualizedOdds2 = 0
    EqualizedOdds = -1 #min value
  else:
    EqualizedOdds2 = (FP_priv/(TN_priv+FP_priv))-(FP_discr/(TN_discr+FP_discr)) # = PredictiveEquality
  if EqualizedOdds1 != 0 and EqualizedOdds2 != 0:
    EqualizedOdds = and_function(EqualizedOdds1, EqualizedOdds2, threshold)
  elif EqualizedOdds1 == 0:
    EqualizedOdds = 1 #max value
  else:
    EqualizedOdds = -1 #min value

  if TP_discr+FP_discr == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality= -1 #min value
  elif TP_priv+FP_priv == 0:
    ConditionalUseAccuracyEquality1 = 0
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality1 = (TP_priv/(TP_priv+FP_priv)) - (TP_discr/(TP_discr+FP_discr))
  if TN_discr+FN_discr == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = -1 #min value
  elif TN_priv+FN_priv == 0:
    ConditionalUseAccuracyEquality2 = 0
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality2 = (TN_priv/(TN_priv+FN_priv)) - (TN_discr/(TN_discr+FN_discr))
  if ConditionalUseAccuracyEquality1 != 0 and ConditionalUseAccuracyEquality2 != 0:
    ConditionalUseAccuracyEquality = and_function(ConditionalUseAccuracyEquality1, ConditionalUseAccuracyEquality2, threshold)
  elif ConditionalUseAccuracyEquality1 == 0:
    ConditionalUseAccuracyEquality = 1 #max value
  else:
    ConditionalUseAccuracyEquality = -1 #min value

  OAE1 = TP_priv-TP_discr
  OAE2 = TN_priv-TN_discr
  OverallAccuracyEquality = and_function(OAE1, OAE2, threshold)

  if FP_discr == 0:
    TreatmentEquality_discr = 0
    TreatmentEquality = 0 #min value
  else:
    TreatmentEquality_discr = (FN_discr/FP_discr)
  if FP_priv == 0:
    TreatmentEquality_priv = 0
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality_priv = (FN_priv/FP_priv)
  if TreatmentEquality_priv != 0 and TreatmentEquality_discr != 0:
    TreatmentEquality = TreatmentEquality_priv-TreatmentEquality_discr
  elif TreatmentEquality_priv == 0:
    TreatmentEquality = 2 #max value
  else:
    TreatmentEquality = 0 #min value

  if TN_discr+FN_discr == 0:
    FORParity_discr = 0
    FORParity = -1  #min value
  else:
    FORParity_discr = (FN_discr)/(TN_discr+FN_discr)
  if TN_priv+FN_priv == 0:
    FORParity_priv = 0
    FORParity = 1 #max value
  else:
    FORParity_priv = (FN_priv)/(TN_priv+FN_priv)
  if FORParity_priv != 0 and FORParity_discr != 0:
    FORParity = FORParity_priv-FORParity_discr
  elif FORParity_priv == 0:
    FORParity = 1 #max value
  else:
    FORParity = -1 #min value

  FN_P_discr =  (FN_discr)/len_discr
  FN_P_priv =  (FN_priv)/len_priv

  FP_P_discr = (FP_discr)/len_discr
  FP_P_priv =  (FP_priv)/len_priv

  #RecallParity = (TP_discr/(TP_discr+FN_discr))/(TP_priv/(TP_priv+FN_priv))

  metrics = {}
  metrics['GroupFairness'] = [GroupFairness, GroupFairness_discr, GroupFairness_priv]
  metrics['PredictiveParity'] = [PredictiveParity, PredictiveParity_discr, PredictiveParity_priv]
  metrics['PredictiveEquality'] = [PredictiveEquality, PredictiveEquality_discr, PredictiveEquality_priv]
  metrics['EqualOpportunity'] = [EqualOpportunity, EqualOpportunity_discr, EqualOpportunity_priv]
  metrics['EqualizedOdds'] = [EqualizedOdds, EqualizedOdds1, EqualizedOdds2]
  metrics['ConditionalUseAccuracyEquality'] = [ConditionalUseAccuracyEquality, ConditionalUseAccuracyEquality1 , ConditionalUseAccuracyEquality2]
  metrics['OverallAccuracyEquality'] = [OverallAccuracyEquality, OAE1, OAE2]
  metrics['TreatmentEquality'] = [TreatmentEquality, TreatmentEquality_discr, TreatmentEquality_priv]
  metrics['FORParity'] = [FORParity, FORParity_discr, FORParity_priv]
  metrics['FN'] = [FN_P_priv-FN_P_discr, FN_P_discr, FN_P_priv]
  metrics['FP'] = [FP_P_discr-FP_P_priv, FP_P_discr, FP_P_priv]

  for k in metrics.keys():
    value = standardization(metrics[k][0])
    discr = metrics[k][1]
    priv = metrics[k][2]
    metrics[k] = {'Value': value, 'Discr_group': discr, 'Priv_group': priv}

  return metrics

In [96]:
def compute_fairness_metrics(predictions_and_tests, target_variable_labels, models, n_splits):
  confusion_matrices = compute_confusion_matrices(predictions_and_tests, target_variable_labels, models, n_splits)
  fairness_metrics = {}
  sub_fairness_metrics = {}
  div_fairness_metrics = {}
  sub_dict = {}
  div_dict = {}
  #mitigation technique allow multiple models
  if mitigation not in without_model_mitigations:
    for model_name in (models):
      sub_dict = {}
      div_dict = {}
      for i in range(0,n_splits):
        model_split_conf_matrix = fairness_metrics_division(confusion_matrices[model_name][i])
        sub_dict[i] = fairness_metrics_subtraction(confusion_matrices[model_name][i])
        div_dict[i] = fairness_metrics_division(confusion_matrices[model_name][i])

      div_fairness_metrics[model_name] = div_dict
      sub_fairness_metrics[model_name] = sub_dict
  else:
    sub_dict = {}
    div_dict = {}
    for i in range(0,n_splits):
        sub_dict[i] = fairness_metrics_subtraction(confusion_matrices[i])
        div_dict[i] = fairness_metrics_division(confusion_matrices[i])

    div_fairness_metrics = div_dict
    sub_fairness_metrics = sub_dict

  fairness_metrics['division'] = div_fairness_metrics
  fairness_metrics['subtraction'] = sub_fairness_metrics

  return fairness_metrics

In [97]:
def compute_mean_std_dev_fairness_metrics(fairness_metrics, models):
  family_metrics = {}
  for f in family:
    model_metrics = {}
    #mitigation technique allow multiple models
    if mitigation not in without_model_mitigations:
      for m in models:
        metric_dict = {}
        for fair_m in fairness_catalogue:
          vec_metrics = []
          for i in range(0,n_splits):
            vec_metrics.append(fairness_metrics[f][m][i][fair_m]['Value'])
          #print(vec_metrics)
          #print(np.mean(vec_metrics), np.std(vec_metrics))
          metric_dict[fair_m] = [np.mean(vec_metrics), np.std(vec_metrics)]
        print(m, metric_dict)
        model_metrics[m] = metric_dict
    #without multiple models
    else:
      metric_dict = {}
      for fair_m in fairness_catalogue:
        vec_metrics = []
        for i in range(0,n_splits):
          vec_metrics.append(fairness_metrics[f][i][fair_m]['Value'])
        metric_dict[fair_m] = [np.mean(vec_metrics), np.std(vec_metrics)]
      model_metrics = metric_dict

    family_metrics[f]=model_metrics

  return family_metrics

# Apply measures

In [100]:
predictions_and_tests = compute_predictions_and_tests(df, target_variable, n_splits, models, n_estimators, random_seed)
print(predictions_and_tests)
fairness_metrics = compute_fairness_metrics(predictions_and_tests, target_variable_labels, models, n_splits)
print(fairness_metrics)
final_metrics = compute_mean_std_dev_fairness_metrics(fairness_metrics, models)
print(final_metrics)

  0%|          | 0/6 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

{'Logistic Regression': {0: {'y_test': array([0, 1, 0, ..., 1, 0, 1]), 'y_pred': array([1, 1, 0, ..., 0, 0, 1]), 's_test': array([1, 1, 0, ..., 0, 1, 1])}, 1: {'y_test': array([1, 1, 0, ..., 1, 0, 0]), 'y_pred': array([0, 0, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 0, ..., 1, 1, 1])}, 2: {'y_test': array([0, 1, 1, ..., 0, 0, 0]), 'y_pred': array([1, 0, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 1, ..., 1, 1, 0])}, 3: {'y_test': array([1, 1, 0, ..., 0, 0, 1]), 'y_pred': array([0, 0, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 0, ..., 1, 1, 1])}, 4: {'y_test': array([0, 1, 0, ..., 1, 1, 0]), 'y_pred': array([0, 0, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 1, ..., 1, 1, 1])}, 5: {'y_test': array([0, 0, 1, ..., 1, 0, 1]), 'y_pred': array([0, 0, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 1, ..., 1, 0, 0])}, 6: {'y_test': array([0, 1, 0, ..., 0, 0, 0]), 'y_pred': array([0, 1, 0, ..., 0, 0, 0]), 's_test': array([1, 1, 0, ..., 1, 0, 0])}, 7: {'y_test': array([1, 0, 0, ..., 0, 1, 1]), 'y_pred': ar

ValueError: At least one label specified must be in y_true

In [None]:
print("PREDS")
for i in range(0,n_splits):
  print(i)
  for m in models:
    print(m, predictions_and_tests[m][i]['y_pred'])
  print('---------------')


In [None]:
print("DIVISION")
for metric in fairness_catalogue:
  print(metric)
  for m in models:
    print(m)
    for i in range(0,n_splits):
      print(i, fairness_metrics["division"][m][i][metric]['Value'])
  print('---------------')

In [None]:
print("DIVISION")
for metric in fairness_catalogue:
  print(metric)
  for m in models:
    print(m, final_metrics["division"][m][metric][0])
  print('---------------')

print("SUBTRACTION")
for metric in fairness_catalogue:
  print(metric)
  for m in models:
    print(m, final_metrics["subtraction"][m][metric][0])
  print('---------------')

In [None]:
#Save the metrics results
#save_path = path_to_project + '/measurements/metrics-{}-original.p'.format(dataset_name)
# if dataset_name == "diabetes-women":
#   save_path = path_to_project + '/measurements/metrics-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == "sepsis":
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == 'aids':
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == 'alzheimer-disease':
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == 'myocardial-infarction':
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == 'diabetes-prediction':
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
# elif dataset_name == 'stroke-prediction':
#   save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)

save_path = path_to_project + '/measurements/metrics-{}-{}-original.p'.format(dataset_name, sensible_attribute)
with open(save_path, 'wb') as fp:
    pickle.dump(final_metrics, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
print(dataset_name, sensible_attribute)