# Exploration of multistage and binary processors

In this notebook we will explore the idea of multistage and logical processors.

A multistage processor is a fairness processor that modifies several steps of the algorithm making process. In particular, we will investigate a hybrid approach in which we combine known processors that affect different stages of the machine learning pipeline.

A logical processor is a tool used when dealing with multiple sensitive attributes or multilabel sensitive attributes which allows us to transform the prottected information into a binary variable for which many more fairness methods are available.

This note

1. 
1. 

## Preliminary adjustments

We start by 

In [1]:
# Standard library
import pickle
import os

# Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
tf.AUTO_REUSE
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier


# aif360
from aif360.datasets import GermanDataset

from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.preprocessing import DisparateImpactRemover

from aif360.algorithms.inprocessing import MetaFairClassifier
from aif360.algorithms.inprocessing import PrejudiceRemover
from aif360.algorithms.inprocessing import AdversarialDebiasing

from aif360.algorithms.postprocessing import EqOddsPostprocessing
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing import RejectOptionClassification

# Custom imports
import utils

pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


In [None]:
seeds = [12345, 424242, 777, 32768, 45234]
seed = 12345
np.random.seed(seed)

12345
424242
777
32768
45234


# Data

In [22]:
seed = 12345
np.random.seed(seed)

#=========================================================================
#                          SIMULATION DATASET
#=========================================================================

#-------------------------------------------------------------------------
#                          One variable
#-------------------------------------------------------------------------


def simul1V(seed = 12345, N = 5000, p1 = 0.5, p2 = 0.5):
    """
    Obtain a simulated dataset from the toy model for the case of one sensitive variable
    ====================================================================================
    Inputs:
        seed (int): seed needed to ensure reproductibility.
        N (int): number of individuals in the dataset.
        p1 (float, between 0.0 and 1.0): probability for a binomial distribution from which to draw the first sensitive variable.
        p2 (float, between 0.0 and 1.0): probability for a binomial distribution from which to draw the second sensitive variable.
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the simulation.
        data_val (aif360.StandardDataset): Validation dataset obtained from the simulation.
        data_test (aif360.StandardDataset): Test dataset obtained from the simulation.
        sensitive_attribute (str): Name of the sensitive attribute .
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
    """
    # Set the seed
    np.random.seed(seed)

    # Create variables
    vars = dict()

    # Sensitive variables (drawn from a binomial distribution)
    vars['sens1'] = np.random.binomial(n = 1, p = p1, size = N)
    vars['sens2'] = np.random.binomial(n = 1, p = p2, size = N)

    # v1, v2 (noisy measurements of the sensitive variables) and their sum
    vars['v1'] = np.random.normal(loc = vars['sens1'], scale = 1.0, size = N)
    vars['v2'] = np.random.normal(loc = vars['sens2'], scale = 1.0, size = N)
    vars['mean'] = np.mean(vars['v1'] + vars['v2'])

    # Noisy measurements of the sum of v1 and v2
    vars['indirect'] = np.random.normal(loc = vars['mean'], scale = 1.0, size = N)
    vars['weight_response'] = np.random.normal(loc = vars['mean'], scale = 1.0, size = N)

    # Response variable
    vars['response'] = vars['weight_response'] > 0.0

    # Create the dataset with the correct variables
    final_vars = ['sens1', 'sens2', 'indirect', 'response']
    df = dict()
    for name in final_vars:
        df[name] = vars[name]
    
    # Transform the sensitive variables to boolean
    df['sens1'] = df['sens1'] == 1
    df['sens2'] = df['sens2'] == 1

    # Create the dataset from the dictionary
    df = pd.DataFrame(df)

    # Convert to standard dataset
    data = utils.convert_to_standard_dataset(
        df=df,
        target_label_name = 'response',
        sensitive_attribute = ['sens1'],
        priviledged_classes = [lambda x: x == 1],
        favorable_target_label = [1],
        features_to_keep = [],
        categorical_features = ['sens2']
    )

    # train, val, test split
    data_train, vt = data.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    # Obtain sensitive attributes and privileged groups
    sensitive_attribute = data.protected_attribute_names[0] 
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(data)

    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups



#-------------------------------------------------------------------------
#                          Two variables
#-------------------------------------------------------------------------


def simul2V(seed = 12345, operation = "OR", N = 5000, p1 = 0.6, p2 = 0.6):
    """
    Obtain a simulated dataset from the toy model for the case of two sensitive variables
    =====================================================================================
    Inputs:
        seed (int): seed needed to ensure reproductibility.
        operation (str): bitwise operation that we apply to the sensitive variables.
                         Allowed values: "OR", "AND", "XOR".
        N (int): number of individuals in the dataset.
        p1 (float, between 0.0 and 1.0): probability for a binomial distribution from which to draw the first sensitive variable.
        p2 (float, between 0.0 and 1.0): probability for a binomial distribution from which to draw the second sensitive variable.
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the simulation with a bitwise operation applied to two sensitive variables.
        data_val (aif360.StandardDataset): Validation dataset obtained from the simulation with a bitwise operation applied to two sensitive variables.
        data_test (aif360.StandardDataset): Test dataset obtained from the simulation with a bitwise operation applied to two sensitive variables.
        sensitive_attribute (str): Name of the sensitive attribute.
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
        data_val_single (aif360.StandardDataset): Validation dataset with just one sensitive variable.
        data_test_single (aif360.StandardDataset): Test dataset with just one sensitive variable.
    """
    # Set the seed
    np.random.seed(seed)

    # Create variables
    vars = dict()

    # Sensitive variables (drawn from a binomial distribution)
    vars['sens1'] = np.random.binomial(n = 1, p = p1, size = N)
    vars['sens2'] = np.random.binomial(n = 1, p = p2, size = N)

    # v1, v2 (noisy measurements of the sensitive variables) and their sum
    vars['v1'] = np.random.normal(loc = vars['sens1'], scale = 1.0, size = N)
    vars['v2'] = np.random.normal(loc = vars['sens2'], scale = 1.0, size = N)
    vars['mean'] = np.mean(vars['v1'] + vars['v2'])

    # Noisy measurements of the sum of v1 and v2
    vars['indirect'] = np.random.normal(loc = vars['mean'], scale = 1.0, size = N)
    vars['weight_response'] = np.random.normal(loc = vars['mean'], scale = 1.0, size = N)

    # Response variable
    vars['response'] = vars['weight_response'] > 0.0

    # Create the dataset with the correct variables
    final_vars = ['sens1', 'sens2', 'indirect', 'response']
    df = dict()
    for name in final_vars:
        df[name] = vars[name]
    
    df['sens1'] = df['sens1'] == 1
    df['sens2'] = df['sens2'] == 1

    # Apply bitwise operation
    if operation == 'OR':
        df['prot_attr'] = np.logical_or(df['sens1'], df['sens2'])

    elif operation == 'AND':
        df['prot_attr'] = np.logical_and(df['sens1'], df['sens2'])

    elif operation == 'XOR':
        df['prot_attr'] = np.logical_xor(df['sens1'], df['sens2'])

    df = pd.DataFrame(df)

    # Convert to standard datasets
    data_single = utils.convert_to_standard_dataset(
        df=df,
        target_label_name = 'response',
        sensitive_attribute = ['sens1'],
        priviledged_classes = [lambda x: x == 1],
        favorable_target_label = [1],
        features_to_keep = [],
        categorical_features = []
    )

    data = utils.convert_to_standard_dataset(
        df=df,
        target_label_name = 'response',
        sensitive_attribute = ['prot_attr'],
        priviledged_classes = [lambda x: x == 1],
        favorable_target_label = [1],
        features_to_keep = [],
        categorical_features = []
    )

    # train, val, test split
    data_train, vt = data.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    _, vt_single = data_single.split([0.7], shuffle=True, seed=seed)
    data_val_single, data_test_single = vt_single.split([0.5], shuffle=True, seed=seed)

    # Obtain sensitive attributes and privileged groups
    sensitive_attribute = data.protected_attribute_names[0] 
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(data)

    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups, data_val_single, data_test_single


#=========================================================================
#                          GERMAN DATASET
#=========================================================================

#-------------------------------------------------------------------------
#                          One variable
#-------------------------------------------------------------------------

def GermanDataset1V(seed = 12345):
    """
    Read and preprocess the German dataset for the case of one sensitive variable
    (https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data).
    ====================================================================================
    Inputs:
        seed (int): seed needed to ensure reproductibility.
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the German dataset.
        data_val (aif360.StandardDataset): Validation dataset obtained from the German dataset.
        data_test (aif360.StandardDataset): Test dataset obtained from the German dataset.
        sensitive_attribute (str): Name of the sensitive attribute .
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
    """
    # Set the seed
    np.random.seed(seed)

    # Read the data
    dataset_german = GermanDataset(
            protected_attribute_names=['age'],            
            privileged_classes=[lambda x: x >= 25],      
            features_to_drop=['personal_status', 'sex'] 
        )
        
    # xgboost requires labels to start at zero
    dataset_german.labels[dataset_german.labels.ravel() == 2] =  dataset_german.labels[dataset_german.labels.ravel() == 2] - 2
    dataset_german.unfavorable_label = dataset_german.unfavorable_label - 2

    # train, val, test split
    data_train, vt = dataset_german.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    # We obtain sensitive attribute
    sensitive_attribute = dataset_german.protected_attribute_names[0] # age
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(dataset_german)
    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups


#-------------------------------------------------------------------------
#                          Two variables
#-------------------------------------------------------------------------


def GermanDataset2V(seed = 12345, operation = "OR"):
    """
    Read and preprocess the German dataset for the case of two sensitive variables
    (https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data).
    ====================================================================================
    Inputs:
        seed (int): seed needed to ensure reproductibility.
        operation (str): bitwise operation that we apply to the sensitive variables.
                         Allowed values: "OR", "AND", "XOR".
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the German dataset with a bitwise operation applied to two sensitive variables.
        data_val (aif360.StandardDataset): Validation dataset obtained from the German dataset with a bitwise operation applied to two sensitive variables.
        data_test (aif360.StandardDataset): Test dataset obtained from the German dataset with a bitwise operation applied to two sensitive variables.
        sensitive_attribute (str): Name of the sensitive attribute .
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
        data_val_single (aif360.StandardDataset): Validation dataset with just one sensitive variable.
        data_test_single (aif360.StandardDataset): Test dataset with just one sensitive variable.
    """
    # Set the seed
    np.random.seed(seed)

    # Read the data
    dataset = GermanDataset(
        protected_attribute_names=['age'],            
        privileged_classes=[lambda x: x >= 25],      
        features_to_drop=['personal_status', 'sex'] 
    )

    # load the german dataset and update the data with the OR sum of sex and age
    dataset_german_upd = utils.update_german_dataset_from_multiple_protected_attributes(dataset, operation)

    # change favorable/unfavorable labels to 1: good; 0: bad
    dataset_german_upd.labels[dataset_german_upd.labels.ravel() == 2] =  dataset_german_upd.labels[dataset_german_upd.labels.ravel() == 2] - 2
    dataset_german_upd.unfavorable_label = dataset_german_upd.unfavorable_label - 2

    # For the single dataset as well
    dataset.labels[dataset.labels.ravel() == 2] =  dataset.labels[dataset.labels.ravel() == 2] - 2
    dataset.unfavorable_label = dataset.unfavorable_label - 2

    # Train, val, test split
    data_train, vt = dataset_german_upd.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    # We do the same on the single variable dataset
    _, vt = dataset.split([0.7], shuffle=True, seed=seed)
    data_val_single, data_test_single = vt.split([0.5], shuffle=True, seed=seed)

    # Obtain sensitive attributes and privileged groups
    sensitive_attribute = dataset_german_upd.protected_attribute_names[0] 
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(dataset_german_upd)
    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups, data_val_single, data_test_single



#=========================================================================
#                          HOMECREDIT DATASET
#=========================================================================


#-------------------------------------------------------------------------
#                          Data handling
#-------------------------------------------------------------------------

def LoadHomecredit(
        seed: int = 12345,
        sample_size: int = 5000
        ) -> None:
    """
    Reads the homecredit dataset, obtains a sample and store it in the 'data/' folder
    (https://www.kaggle.com/c/home-credit-default-risk).
    ====================================================================================
    Inputs:
        seed (int): seed needed to ensure reproductibility.
        sample_size (int): size of the sample 
    Outputs:
        None
    """

    # We set a seed    
    np.random.seed(seed)
    
    # We download the data
    homecredit = pd.read_csv('data/homecredit.zip', compression='zip', header=0, sep=',', quotechar='"')
    nrows = homecredit.shape[0]

    # We sample the dataset to make it more maneagable
    ssample = np.random.choice(nrows, size = sample_size, replace = False)
    homecredit = homecredit.iloc[ssample, :]
    homecredit = homecredit.reset_index(drop=True)
    
    # We store the homecredit dataset in the data folder
    path = 'data/'
    with open(path + 'homecredit.pickle', 'wb') as handle:
        pickle.dump(homecredit, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return 


    
def ReadHomecredit():
    """
    Reads the sample from the homecredit dataset that we stored in the 'data/' folder
    (https://www.kaggle.com/c/home-credit-default-risk).
    ====================================================================================
    Inputs:
        None
    Outputs:
        homecredit (pd.DataFrame): dataframe that contains a subsample from the homecredit dataset
    """

    # Load the data
    homecredit = pd.read_pickle('data/homecredit.pickle')
    return homecredit


#-------------------------------------------------------------------------
#                          One variable
#-------------------------------------------------------------------------


def Homecredit1V(seed = 12345):
    """
    Read and preprocess the Homecredit dataset for the case of one sensitive variable
    (https://www.kaggle.com/c/home-credit-default-risk).
    ====================================================================================
    Inputs:
        dataset_homecredit (pd.DataFrame): subsample of the homecredit dataset
        seed (int): seed needed to ensure reproductibility.
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the Homecredit dataset.
        data_val (aif360.StandardDataset): Validation dataset obtained from the Homecredit dataset.
        data_test (aif360.StandardDataset): Test dataset obtained from the Homecredit dataset.
        sensitive_attribute (str): Name of the sensitive attribute .
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
    """
    # Set the seed
    np.random.seed(seed)

    # Read the data
    dataset_homecredit = ReadHomecredit()

    # Make a copy of the dataset
    homecredit = dataset_homecredit.copy(deep = True)

    # Pre process
    homecredit = utils.preprocess_homecredit(homecredit)

    # Transform to standard dataset
    dataset_homecredit_aif = utils.convert_to_standard_dataset(
            df=homecredit,
            target_label_name='TARGET',
            sensitive_attribute='AGE',
            priviledged_classes=[lambda x: x >= 25],
            favorable_target_label=[1],
            features_to_keep=[],
            categorical_features=[])
    
    # Perform train, test, val split
    data_train, vt = dataset_homecredit_aif.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    # Obtain sensitive attributes and privileged groups
    sensitive_attribute = dataset_homecredit_aif.protected_attribute_names[0] # age
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(dataset_homecredit_aif)
    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups


#-------------------------------------------------------------------------
#                          Two variables
#-------------------------------------------------------------------------


def Homecredit2V(seed = 12345, operation = "OR"):
    """
    Read and preprocess the Homecredit dataset for the case of one sensitive variable
    (https://www.kaggle.com/c/home-credit-default-risk).
    ====================================================================================
    Inputs:
        dataset_homecredit (pd.DataFrame): subsample of the homecredit dataset
        seed (int): seed needed to ensure reproductibility.
        operation (str): bitwise operation that we apply to the sensitive variables.
                         Allowed values: "OR", "AND", "XOR".
        
    Outputs:
        data_train (aif360.StandardDataset): Train dataset obtained from the Homecredit dataset with a bitwise operation applied to two sensitive variables.
        data_val (aif360.StandardDataset): Validation dataset obtained from the Homecredit dataset with a bitwise operation applied to two sensitive variables.
        data_test (aif360.StandardDataset): Test dataset obtained from the Homecredit dataset with a bitwise operation applied to two sensitive variables.
        sensitive_attribute (str): Name of the sensitive attribute.
        privileged_groups (list): list that stores a dictionary with the sensitive attribute and the privileged label.
        unprivileged_groups (list): list that stores a dictionary with the sensitive attribute and the unprivileged label.
        data_val_single (aif360.StandardDataset): Validation dataset with just one sensitive variable.
        data_test_single (aif360.StandardDataset): Test dataset with just one sensitive variable.
    """
    # Set the seed
    np.random.seed(seed)

    # Read the data
    dataset_homecredit = ReadHomecredit()

    # Copy the dataset
    homecredit = dataset_homecredit.copy(deep = True)

    # Pre process the data
    homecredit = utils.preprocess_homecredit_mult(homecredit, operation = operation)
    homecredit_single = homecredit.copy(deep = True)
    
    # Transform both datasets to aif360 format
    homecredit = utils.convert_to_standard_dataset(
            df=homecredit,
            target_label_name='TARGET',
            sensitive_attribute=['PROT_ATTR'],
            priviledged_classes=[lambda x: x == 1],
            favorable_target_label=[1],
            features_to_keep=[],
            categorical_features=[])

    homecredit_single = utils.convert_to_standard_dataset(
            df=homecredit_single,
            target_label_name='TARGET',
            sensitive_attribute=['AGE'],
            priviledged_classes=[lambda x: x >= 25],
            favorable_target_label=[1],
            features_to_keep=[],
            categorical_features=[])

    # train, val, test split
    data_train, vt = homecredit.split([0.7], shuffle=True, seed=seed)
    data_val, data_test = vt.split([0.5], shuffle=True, seed=seed)

    _, vt_single = homecredit.split([0.7], shuffle=True, seed=seed)
    data_val_single, data_test_single = vt_single.split([0.5], shuffle=True, seed=seed)

    # Obtain sensitive attributes and privileged groups
    sensitive_attribute = homecredit.protected_attribute_names[0] 
    privileged_groups, unprivileged_groups = utils.get_privileged_groups(homecredit)

    return data_train, data_val, data_test, sensitive_attribute, privileged_groups, unprivileged_groups, data_val_single, data_test_single


# Variables

In [4]:
# Inicializamos diccionarios
methods = dict()

# RRange of thresholds
thresh_sweep = np.linspace(0.01, 1.0, 50)

metrics_sweep = dict()

# Store results from validation and test
metrics_best_thresh_validate = dict()
metrics_best_thresh_test = dict()

In [5]:
modelsNames = [
    'logreg',
    'xgboost'
#    'adversarial',
#    'metafair',
#    'piremover'
]

modelsTrain = {
    'logreg': LogisticRegression,
    'xgboost': XGBClassifier
#    'adversarial': AdversarialDebiasing,
#    'metafair': MetaFairClassifier,
#    'piremover': PrejudiceRemover
}

modelsArgs = {
    'logreg': {
        'solver': 'liblinear',
        'random_state': seed
    },
    'xgboost': {
        'eval_metric': 'error',
        'eta':0.1,
        'max_depth':6,
        'subsample':0.8
    }
#    'adversarial': {
#        'privileged_groups': privileged_groups,
#        'unprivileged_groups': unprivileged_groups,
#        'scope_name': 'debiased_classifier',
#        'debias': True,
#        'sess': tf.session(), # Mirar esto de la sesion
#        'num_epochs': 80
#    },
#    'metafair_sr': {
#        'tau': 0.8,
#        'sensitive_attribute': sensitive_attribute,
#        'type': 'sr',
#        'seed': seed
#    },
#    'metafair_fdr': {
#        'tau': 0.8,
#        'sensitive_attribute': sensitive_attribute,
#        'type': 'fdr',
#        'seed': seed
#    },
#    'pir': {
#        'sensitive_attr': sensitive_attribute,
#        'eta': 50.0
#    }
}


In [6]:
def ObtainPrelDataSingle():
    
    modelsNames = [
        'logreg',
        'xgboost'
    ]

    modelsTrain = {
        'logreg': LogisticRegression,
        'xgboost': XGBClassifier
    }

    modelsArgs = {
        'logreg': {
            'solver': 'liblinear',
            'random_state': seed
        },
        'xgboost': {
            'eval_metric': 'error',
            'eta':0.1,
            'max_depth':6,
            'subsample':0.8
        }
    }

    return modelsNames, modelsTrain, modelsArgs

def ObtainPrelDataMultiple(sensitive_attribute, privileged_groups, unprivileged_groups):
    modelsNames = [
        'logreg',
        'xgboost',
        'adversarial',
        'metafair',
        'pir'
    ]


    modelsBenchmark = [
        'logreg',
        'xgboost'
    ]

    modelsFair = [
        'adversarial',
        'metafair_sr',
        'metafair_fdr',
        'pir'
    ]

    modelsPre = [
        prefix + '_' + model_name for prefix in ['RW', 'DI'] for model_name in modelsBenchmark
    ]


    modelsPost = modelsPre + modelsFair


    modelsTrain = {
        'logreg': LogisticRegression,
        'xgboost': XGBClassifier,
        'adversarial': AdversarialDebiasing,
        'metafair': MetaFairClassifier,
        'pir': PrejudiceRemover
    }

    modelsArgs = {
        'logreg': {
            'solver': 'liblinear',
            'random_state': seed
        },
        'xgboost': {
            'eval_metric': 'error',
            'eta':0.1,
            'max_depth':6,
            'subsample':0.8
        },
        'adversarial': {
            'privileged_groups': privileged_groups,
            'unprivileged_groups': unprivileged_groups,
            'scope_name': 'debiased_classifier',
            'debias': True,
            'num_epochs': 80
        },
        'metafair': {
            'tau': 0.8,
            'sensitive_attr': sensitive_attribute,
            'type': 'sr',
            'seed': seed
        },
    #    'metafair_fdr': {
    #        'tau': 0.8,
    #        'sensitive_attribute': sensitive_attribute,
    #        'type': 'fdr',
    #        'seed': seed
    #    },
        'pir': {
            'sensitive_attr': sensitive_attribute,
            'eta': 50.0
        }
    }

    return modelsNames, modelsBenchmark, modelsPost, modelsTrain, modelsArgs

# Auxiliary functions

In [7]:
def results(val, test, method):

    # Evaluate the model in a range of thresholds
    metrics_sweep[method] = utils.metrics_threshold_sweep(
        dataset=val,
        model=methods[method],
        thresh_arr=thresh_sweep
    )

    # Evaluate the metrics for the best threshold
    metrics_best_thresh_validate[method] = utils.describe_metrics(
        metrics_sweep[method],
        measurement,
        combination
        )

    # Compute the metrics in test using the best threshold for validation
    metrics_best_thresh_test[method] = utils.compute_metrics(
        dataset=test, 
        model=methods[method], 
        threshold=metrics_best_thresh_validate[method]['best_threshold'])
    


def results_mult(val, val_single, test, test_single, method):
    # Global variables
    global methods
    global metrics_sweep
    global metrics_best_thresh_validate
    global metrics_best_thresh_test

    # Evaluate the model in a range of thresholds
    metrics_sweep[method] = utils.metrics_threshold_sweep_mult(
        dataset = val,
        dataset_single = val_single,
        model = methods[method],
        thresh_arr = thresh_sweep
    )

    # Evaluate the metrics for the best threshold
    metrics_best_thresh_validate[method] = utils.describe_metrics(metrics_sweep[method])

    # Compute the metrics in test using the best threshold for validation
    metrics_best_thresh_test[method] = utils.compute_metrics_mult(
        dataset = test, 
        dataset_single = test_single,
        model = methods[method], 
        threshold = metrics_best_thresh_validate[method]['best_threshold'])

# Benchmarks

In [8]:
def BenchmarkLogistic(data_train, data_val, data_test):
    # Global variables
    global methods
    global nvar

    # Assign the correct name
    model_name = 'logreg'

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Model parameters
    fit_params = {'sample_weight': train.instance_weights}

    # Introduce the model in the model dict
    methods[model_name] = LogisticRegression(
        solver='liblinear',
        random_state=seed
    )

    # Train the model
    methods[model_name] = methods[model_name].fit(train.features, train.labels.ravel(), **fit_params)

    # Obtain results
    if nvar == 1:
        results(val, test, model_name)

    elif nvar == 2:
        global data_val_single
        global data_test_single
        val_single, test_single = data_val_single.copy(deepcopy = True), data_test_single.copy(deepcopy = True)
        results_mult(val, val_single, test, test_single, model_name)




def BenchmarkXGB(data_train, data_val, data_test):
    # Global variables
    global methods
    global nvar

    # Assign the correct name
    model_name = 'xgboost'

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Model parameters
    fit_params = {'eval_metric': 'error', 'eta':0.1, 'max_depth':6, 'subsample':0.8}

    # Assign the correct dict
    methods[model_name] = XGBClassifier(**fit_params)

    # Train the model
    methods[model_name] = methods[model_name].fit(train.features, train.labels.ravel())

    # Obtain results
    if nvar == 1:
        results(val, test, model_name)

    elif nvar == 2:
        global data_val_single
        global data_test_single
        val_single, test_single = data_val_single.copy(deepcopy = True), data_test_single.copy(deepcopy = True)
        results_mult(val, val_single, test, test_single, model_name)

# Pre Processing

In [9]:
def PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True):
    #Global variables
    global methods
    
    # Assign the correct name
    method = "RW"
    model_name = method + "_" + model

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)
    
    # Call the processor
    PreProcessor = Reweighing(
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups
    )

    # Transform the data
    PreProcessor.fit(train)
    trainRW = PreProcessor.transform(train)
    valRW = PreProcessor.transform(test)
    testRW = PreProcessor.transform(val)

    # Train the model
    if model == 'adversarial':
        tf.compat.v1.reset_default_graph()
        modelsArgs[model]['sess'] = tf.Session()

    Algorithm = modelsTrain[model](**modelsArgs[model])

    if model in modelsBenchmark:
        if model == 'logreg':
            fit_params = {'sample_weight': trainRW.instance_weights}
            methods[model_name] = Algorithm.fit(trainRW.features, trainRW.labels.ravel(), **fit_params)
        else:
            methods[model_name] = Algorithm.fit(trainRW.features, trainRW.labels.ravel())
    else:
        methods[model_name] = Algorithm.fit(trainRW)
            
    # Obtain results
    if do_results:
        if nvar == 1:
            results(valRW, testRW, model_name)

        elif nvar == 2:
            val_single, test_single = data_val_single.copy(deepcopy = True), data_test_single.copy(deepcopy = True)
            results_mult(valRW, val_single, testRW, test_single, model_name)

    if model == 'adversarial':
        modelsArgs[model]['sess'].close()




def PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True):
    #Global variables
    global methods
    
    # Assign the correct name
    method = "DI"
    model_name = method + "_" + model

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Initialize the processor
    PreProcessor = DisparateImpactRemover(
        repair_level=repair_level,
        sensitive_attribute=sensitive_attribute
    )
    # Transform the data
    PreProcessor.fit_transform(train)
    trainDI = PreProcessor.fit_transform(train)
    valDI = PreProcessor.fit_transform(val)
    testDI = PreProcessor.fit_transform(test)

    # Train the model
    if model == 'adversarial':
        tf.compat.v1.reset_default_graph()
        modelsArgs[model]['sess'] = tf.Session()

    Algorithm = modelsTrain[model](**modelsArgs[model])

    if model in modelsBenchmark:
        if model == 'logreg':
            fit_params = {'sample_weight': trainDI.instance_weights}
            methods[model_name] = Algorithm.fit(trainDI.features, trainDI.labels.ravel(), **fit_params)
        else:
            methods[model_name] = Algorithm.fit(trainDI.features, trainDI.labels.ravel())
    else:
        methods[model_name] = Algorithm.fit(trainDI)

    # Obtain results
    if do_results:
        if nvar == 1:
            results(valDI, testDI, model_name)

        elif nvar == 2:
            global data_val_single
            global data_test_single
            val_single, test_single = data_val_single.copy(deepcopy = True), data_test_single.copy(deepcopy = True)
            results_mult(valDI, val_single, testDI, test_single, model_name)

    if model == 'adversarial':
        modelsArgs[model]['sess'].close()
        

# In processing

In [10]:
def InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality,  tau = 0.8, do_results = True):
    # Global variables
    global methods

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # assign the correct name
    model_name = "metafair"
    model_name_quality = '{}_{}'.format(model_name, quality)

    # Initialize the model and store it in the dictionary
    methods[model_name_quality] = MetaFairClassifier(
        tau=tau,
        sensitive_attr=sensitive_attribute,
        type=quality,
        seed=seed
        )

    # Train the model
    methods[model_name_quality] = methods[model_name_quality].fit(train)

    # Obtain scores
    methods[model_name_quality].scores_train = methods[model_name_quality].predict(train).scores
    methods[model_name_quality].scores_val = methods[model_name_quality].predict(val).scores
    methods[model_name_quality].scores_test = methods[model_name_quality].predict(test).scores

    # Obtain results
    if do_results:
        if nvar == 1:
            results(val, test, model_name_quality)

        elif nvar == 2:
            global data_val_single
            global data_test_single
            val_single, test_single = data_val_single.copy(deepcopy = True), data_test_single.copy(deepcopy = True)
            results_mult(val, val_single, test, test_single, model_name_quality)



def InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True):
    # Global variables
    global methods

    # Assign the correct name
    model_name = 'pir'
    
    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)
    
    # Initialize the model and store it in the dictionary
    methods[model_name] = PrejudiceRemover(
        sensitive_attr=sensitive_attribute,
        eta=eta
        )
    
    # Train the model
    methods[model_name] = methods[model_name].fit(train)
    
    # Obtain scores
    methods[model_name].scores_train = methods[model_name].predict(train).scores
    methods[model_name].scores_val = methods[model_name].predict(val).scores
    methods[model_name].scores_test = methods[model_name].predict(test).scores

    # Obtain results
    if do_results:
        results(val, test, model_name)



def InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True):
    # Global variables
    global methods
    global sess
    
    # Assign the correct name
    model_name = 'adversarial'
    
    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)
    
    #We train the model
    methods[model_name] = AdversarialDebiasing(
        privileged_groups = privileged_groups,
        unprivileged_groups = unprivileged_groups,
        scope_name = 'debiased_classifier',
        debias=True,
        sess=sess,
        num_epochs=80
    )    
    methods[model_name].fit(train)

    # Obtain results
    if do_results:
        results(val, test, model_name)

# Post-processing

In [11]:
def PosprocPlatt(data_train, data_val, data_test, privileged_groups, model_name):
    # Global variables
    global methods
    global metrics_sweep
    global metrics_best_thresh_validate
    global metrics_best_thresh_test
    
    # Assign the correct name
    fairness_method = '_Platt'

    # Validation
    #---------------

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy = True), data_val.copy(deepcopy = True), data_test.copy(deepcopy = True)

    # Copy the predictions
    model_thresh = metrics_best_thresh_validate[model_name]['best_threshold']
    val_preds = utils.update_dataset_from_model(val, methods[model_name], class_thresh = model_thresh)

    ## Platt Scaling:
    #---------------
    #1. Split training data on sensitive attribute
    val_preds_priv, val_preds_unpriv, priv_indices, unpriv_indices = utils.split_dataset_on_sensitive_attribute(
        dataset = val_preds,
        privileged_group_label = list((privileged_groups[0].values()))[0]
    )
    
    #2. Copy validation data predictions
    val_preds2 = val_preds.copy(deepcopy = True)
    
    #3. Make one model for each group
    sensitive_groups_data = {'priv': [val_preds_priv, priv_indices],
                             'unpriv': [val_preds_unpriv, unpriv_indices]}
    for group, data_group_list in sensitive_groups_data.items():
        # Assign the correct name
        model_name_group = '{}_{}_{}'.format(model_name, fairness_method, group)
        # Initialize the model, store it in the dict
        methods[model_name_group] = LogisticRegression()
        # Train the model using the validation data divided by group
        methods[ model_name_group ] = methods[model_name_group].fit(
            data_group_list[0].scores,   # data_group_list[0] -> data_val_preds_priv or data_val_preds_unpriv
            val.subset(data_group_list[1]).labels.ravel()
        ) # data_group_list[1] -> priv_indices or unpriv_indices

        # predict group probabilities, store in val_preds2
        # Platt scores are given by the predictions of the posterior probabilities
        scores_group = methods[model_name_group].predict_proba(data_group_list[0].scores)
        pos_ind_group = np.where(methods[model_name_group].classes_ == data_group_list[0].favorable_label)[0][0]
        val_preds2.scores[data_group_list[1]] = scores_group[:, pos_ind_group].reshape(-1,1)
   
    # Evaluate the model in a range of values
    thresh_sweep_platt = np.linspace(np.min(val_preds2.scores.ravel()),
                                     np.max(val_preds2.scores.ravel()),
                                     50)

    # Obtain the metrics for the val set
    metrics_sweep[model_name+fairness_method] = utils.metrics_postprocessing_threshold_sweep_from_scores(
            dataset_true = val,
            dataset_preds = val_preds,
            thresh_arr = thresh_sweep_platt
        )

    # Evaluate metrics and obtain the best thresh
    metrics_best_thresh_validate[model_name+fairness_method] = utils.describe_metrics(metrics_sweep[model_name+fairness_method])

    # Test
    #---------------

    model_thresh = metrics_best_thresh_validate[model_name]['best_threshold']
    test_preds = utils.update_dataset_from_model(test, methods[model_name], class_thresh = model_thresh)

    ## Plat Scaling:
    #---------------
    
    # 1. Divide test set using sensitive varaible's groups
    test_preds_priv, test_preds_unpriv, priv_indices, unpriv_indices = utils.split_dataset_on_sensitive_attribute(
        dataset = test_preds,
        privileged_group_label = list((privileged_groups[0].values()))[0]
    )
    # 2. Copy test data
    if nvar == 1:
        test_preds2 = test_preds.copy(deepcopy = True)
    elif nvar == 2:
        test_single = data_test.copy(deepcopy = True)
        test_preds2 = data_test.copy(deepcopy = True)
        test_single.scores = np.zeros_like(test_single.labels)

    # 3. Predict for each group
    sensitive_groups_data_test = {'priv': [test_preds_priv, priv_indices],
                                  'unpriv': [test_preds_unpriv, unpriv_indices]}
    

    for group, data_group_list in sensitive_groups_data_test.items():    
        # We assign the correct name
        model_name_group = '{}_{}_{}'.format(model_name, fairness_method, group)

        # Predict in each group, store the result in data_val_preds2
        # The probabilities are the Platt scores
        scores_group = methods[model_name_group].predict_proba(data_group_list[0].scores)
        pos_ind_group = np.where(methods[model_name_group].classes_ == data_group_list[0].favorable_label)[0][0]
        test_preds2.scores[data_group_list[1]] = scores_group[:, pos_ind_group].reshape(-1,1)


    if nvar == 1:    
        # Obtain metrics
        metrics_best_thresh_test[model_name+fairness_method] = utils.compute_metrics_from_scores(
            dataset_true = test,
            dataset_pred = test_preds2,
            threshold = metrics_best_thresh_validate[model_name+fairness_method]['best_threshold']
        )

    elif nvar == 2:
        # Obtain metrics
        metrics_best_thresh_test[model_name+fairness_method] = utils.compute_metrics_from_scores(
            dataset_true = test_single,
            dataset_pred = test_preds2,
            threshold = metrics_best_thresh_validate[model_name+fairness_method]['best_threshold']
        )

     




def PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model_name):
    # Global variables
    global methods
    global metrics_best_thresh_validate    
    
    # Assign the correct name
    fairness_method = '_eqOdds' 

    # Copy the dataset
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Copy the predictions of the base model
    train_preds = utils.update_dataset_from_model(train, methods[model_name])
    val_preds = utils.update_dataset_from_model(val, methods[model_name])
    test_preds = utils.update_dataset_from_model(test, methods[model_name])

    # Initialize the model and store the predictions
    methods[model_name+fairness_method] = EqOddsPostprocessing(
        privileged_groups = privileged_groups,
        unprivileged_groups = unprivileged_groups, 
        seed = seed)

    # Train the model
    methods[model_name+fairness_method] = methods[model_name+fairness_method].fit(train, train_preds)

    # Evaluate the model in a range of thresholds
    metrics_sweep[model_name+fairness_method] = utils.metrics_postprocessing_threshold_sweep(
        dataset_true=val,
        dataset_preds=val_preds,
        model=methods[model_name+fairness_method],
        thresh_arr=thresh_sweep,
        scores_or_labels='labels'
    )

    # Evaluate the model for the best threshold
    metrics_best_thresh_validate[model_name+fairness_method] = utils.describe_metrics(metrics_sweep[model_name+fairness_method])

    if nvar == 1:

        # We use the best threshold to obtain predicitions for test
        metrics_best_thresh_test[model_name+fairness_method] = utils.compute_metrics_postprocessing(
            dataset_true=test,
            dataset_preds=test_preds,
            model=methods[model_name+fairness_method], 
            threshold=metrics_best_thresh_validate[model_name+fairness_method]['best_threshold'], 
            scores_or_labels='labels'
        )

    elif nvar == 2:

        test_single = data_test_single.copy(deepcopy=True)
        # We use the best threshold to obtain predicitions for test
        metrics_best_thresh_test[model_name+fairness_method] = utils.compute_metrics_postprocessing_mult(
            dataset_true=test,
            dataset_preds=test_preds,
            dataset_true_single = test_single,
            model=methods[model_name+fairness_method], 
            threshold=metrics_best_thresh_validate[model_name+fairness_method]['best_threshold'], 
            scores_or_labels='labels'
        )




def PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model_name, quality):
    # Global variables
    global methods
    global metrics_best_thresh_validate

     # Assign the correct name
    fairness_method = '_eqOdds'

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Copy the model's predictions
    train_preds = utils.update_dataset_from_model(train, methods[model_name])
    val_preds = utils.update_dataset_from_model(val, methods[model_name])
    test_preds = utils.update_dataset_from_model(test, methods[model_name])

    # Assign the correct name
    model_name_metric = model_name + fairness_method + '_' + quality
    
    # Initialize the model 
    methods[model_name_metric] = CalibratedEqOddsPostprocessing(
        privileged_groups=privileged_groups,
        unprivileged_groups=unprivileged_groups,
        cost_constraint=quality,
        seed=seed)
    
    # Train the model
    methods[model_name_metric] = methods[model_name_metric].fit(train, train_preds)

    # Evaluate the model for a range of thresholds
    metrics_sweep[model_name_metric] = utils.metrics_postprocessing_threshold_sweep(
        dataset_true = val,
        dataset_preds = val_preds,
        model = methods[model_name_metric],
        thresh_arr = thresh_sweep,
        scores_or_labels = 'scores'
    )

    # Evaluate in best thresh
    metrics_best_thresh_validate[model_name_metric] = utils.describe_metrics(metrics_sweep[model_name_metric])

    if nvar == 1:

        # Using the best thresh, evaluate in test
        metrics_best_thresh_test[model_name_metric] = utils.compute_metrics_postprocessing(
            dataset_true=test,
            dataset_preds=test_preds,
            model=methods[model_name_metric], 
            threshold=metrics_best_thresh_validate[model_name_metric]['best_threshold'], 
            scores_or_labels='scores'
        )

    elif nvar == 2:
        test_single = data_test_single.copy(deepcopy=True)

        # We use the best threshold to obtain predicitions for test
        metrics_best_thresh_test[model_name+fairness_method] = utils.compute_metrics_postprocessing_mult(
            dataset_true=test,
            dataset_preds=test_preds,
            dataset_true_single = test_single,
            model=methods[model_name+fairness_method], 
            threshold=metrics_best_thresh_validate[model_name+fairness_method]['best_threshold'], 
            scores_or_labels='labels'
        )




def PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model_name, key_metric):
    # Global variables
    global methods
    global metrics_best_thresh_validate
    global fair_metrics_optrej

    # Assign the correct name
    fairness_method = '_RejOpt'
    model_name_metric = model_name + fairness_method + '_' + key_metric

    # Copy the datasets
    train, val, test = data_train.copy(deepcopy=True), data_val.copy(deepcopy=True), data_test.copy(deepcopy=True)

    # Copy predictions
    train_preds = utils.update_dataset_from_model(train, methods[model_name])
    val_preds = utils.update_dataset_from_model(val, methods[model_name])
    test_preds = utils.update_dataset_from_model(test, methods[model_name])

    # Train the model
    methods[model_name_metric] = RejectOptionClassification(
        unprivileged_groups=unprivileged_groups, 
        privileged_groups=privileged_groups, 
        metric_name=fair_metrics_optrej[key_metric],
        metric_lb=-0.01,
        metric_ub=0.01
        )

    # Train the model
    methods[model_name_metric] = methods[model_name_metric].fit(train, train_preds)


    if nvar == 1:
        # Obtain best threshold in val
        metrics_best_thresh_validate[model_name_metric] = utils.compute_metrics_postprocessing(
            dataset_true=val, 
            dataset_preds=val_preds, 
            model=methods[model_name_metric], 
            required_threshold=False)
        
        # Obtain it in test
        metrics_best_thresh_test[model_name_metric] = utils.compute_metrics_postprocessing(
            dataset_true=test, 
            dataset_preds=test_preds, 
            model=methods[model_name_metric], 
            required_threshold=False)
        
    elif nvar == 2:
        val_single, test_single = data_val_single.copy(deepcopy=True), data_test_single.copy(deepcopy=True)
        # Obtain best threshold in val
        metrics_best_thresh_validate[model_name_metric] = utils.compute_metrics_postprocessing_mult(
            dataset_true=val, 
            dataset_preds=val_preds,
            dataset_true_single=val_single, 
            model=methods[model_name_metric], 
            required_threshold=False)
        
        # Obtain it in test
        metrics_best_thresh_test[model_name_metric] = utils.compute_metrics_postprocessing_mult(
            dataset_true=test, 
            dataset_preds=test_preds, 
            dataset_true_single=val_single,
            model=methods[model_name_metric], 
            required_threshold=False)

# Model training

## Grids

In [12]:
# DI remover
repair_level = 0.5                      
dir_grid = {                           
    'repair_level': [0.25, 0.5, 0.75]
}                


# MetaFair classifier
quality_constraints_meta = ['sr', 'fdr']
tau = 0.8   
metafair_grid = {
    'tau': [0.1, 0.2, 0.5, 0.8, 0.9]
}

# Prejudice index regularizer
pir_grid = {
    'eta': [ 0.5, 5.0, 50.0, 500.0]
}

# Adversarial learning
pir_grid = {
    'eta': [ 0.5, 5.0, 50.0, 500.0]
}

# Equal odds
# Quality constraints
quality_constraints_eqodds = ["weighted", 'fnr', 'fpr']

# Option rejection
# Fairness metrics
fair_metrics_optrej = {
    'spd': "Statistical parity difference",
    'aod': "Average odds difference",
    'eod': "Equal opportunity difference"
}

In [23]:
i = 0

seeds = [12345, 424242, 777, 32768, 45234]
seeds = [12345]
datasets = ['Simulation', 'German', 'Homecredit']
datasets = ['Homecredit']
nvars = ['1', '2']
operations = ['OR', 'AND', 'XOR']
cases = ['ind', 'com']

loadDatasets = {
    'Simulation1V': simul1V,
    'Simulation2V': simul2V,
    'German1V': GermanDataset1V,
    'German2V': GermanDataset2V,
    'Homecredit1V': Homecredit1V,
    'Homecredit2V': Homecredit2V
}

measurement = 'bal_acc'
combination = []

resultsDict = dict()


for seed in seeds:
    i += 1
    for data in datasets:
        for nvar in nvars:

            dataset = data + nvar + 'V'
            
            if nvar == '1':
                # Arguments for the iteration
                argumentsLoadData = {
                    'seed': seed
                }
                nvar = 1

                # Load data
                data_train, data_val, data_test, \
                sensitive_attribute, privileged_groups, \
                unprivileged_groups = loadDatasets[dataset](**argumentsLoadData)

                for case in cases:
                    if case == 'ind': 

                        # Obtain benchmarks
                        modelsNames, modelsTrain, modelsArgs = ObtainPrelDataSingle()
                        modelsBenchmark = modelsNames


                        # Initialize dicts
                        methods = dict()

                        # Range of thresholds to evaluate our models
                        thresh_sweep = np.linspace(0.01, 1.0, 50)
                        metrics_sweep = dict()

                        # Store results from validation and test
                        metrics_best_thresh_validate = dict()
                        metrics_best_thresh_test = dict()

                        # Benchmarks
                        BenchmarkLogistic(data_train, data_val, data_test)
                        BenchmarkXGB(data_train, data_val, data_test)
                        
                        # Pre processing
                        for model in modelsNames:
                            PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
                            PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
                        
                        # In processing
                        for quality in quality_constraints_meta:
                            InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
                        InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)
                        
                        tf.compat.v1.reset_default_graph()
                        sess = tf.compat.v1.Session()
                        InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)
                        sess.close()
                        
                        # Post processing
                        for model in modelsNames:
                            PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
                            PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
                            for quality in quality_constraints_eqodds:
                                PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
                            for key_metric in fair_metrics_optrej:
                                PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)

                        file = dataset + '_' + case + '_' + str(i)
                        
                        resultsDict[file] = dict()
                        resultsDict[file]['methods'] = methods
                        resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
                        resultsDict[file]['metrics_sweep'] = metrics_sweep

                        with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
                            pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
                        with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
                            pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)
        
                    elif case == 'com':
                        # Obtain benchmarks and in proncessing models
                        modelsNames, modelsBenchmark, modelsPost, \
                        modelsTrain, modelsArgs = ObtainPrelDataMultiple(sensitive_attribute, privileged_groups, unprivileged_groups)

                        # Initialize dicts
                        methods = dict()

                        # Range of thresholds to evaluate our models
                        thresh_sweep = np.linspace(0.01, 1.0, 50)
                        metrics_sweep = dict()

                        # Store results from validation and test
                        metrics_best_thresh_validate = dict()
                        metrics_best_thresh_test = dict()

                        # Benchmarks
                        BenchmarkLogistic(data_train, data_val, data_test)
                        BenchmarkXGB(data_train, data_val, data_test)

                        # Pre processing + In processing
                        for model in modelsNames:
                            if model == 'adversarial':
                                tf.compat.v1.reset_default_graph()
                                sess = tf.compat.v1.Session()
                            PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
                            if model == 'adversarial':
                                sess.close()
                                tf.compat.v1.reset_default_graph()
                                sess = tf.compat.v1.Session()
                            PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
                            
                            if model == 'adversarial':
                                sess.close()

                        # Pre/In processing + Post processing
                        for quality in quality_constraints_meta:
                            InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
                        InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)

                        tf.compat.v1.reset_default_graph()
                        sess = tf.compat.v1.Session()
                        InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)

                        for model in modelsPost:
                            PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
                            PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
                            for quality in quality_constraints_eqodds:
                                PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
                            for key_metric in fair_metrics_optrej:
                                PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)
                                
                        sess.close()
                        
                        file = dataset + '_' + case + '_' + str(i)

                        resultsDict[file] = dict()
                        resultsDict[file]['methods'] = methods
                        resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
                        resultsDict[file]['metrics_sweep'] = metrics_sweep
                        

                        with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
                            pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
                        with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
                            pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

            

            elif nvar == '2':
                for operation in operations:
                        argumentsLoadData = {
                            'seed': seed,
                            'operation': operation
                        }
                        nvar = 2

                        resultsDict[dataset + '_' + operation] = dict()

                        data_train, data_val, data_test, \
                        sensitive_attribute, privileged_groups, unprivileged_groups, \
                        data_val_single, data_test_single = loadDatasets[dataset](**argumentsLoadData)
            
                        for case in cases:
                            if case == 'ind': 

                                # Initialize dicts
                                methods = dict()

                                # Obtain benchmarks
                                modelsNames, modelsTrain, modelsArgs = ObtainPrelDataSingle()
                                modelsBenchmark = modelsNames

                                # Range of thresholds to evaluate our models
                                thresh_sweep = np.linspace(0.01, 1.0, 50)
                                metrics_sweep = dict()

                                # Store results from validation and test
                                metrics_best_thresh_validate = dict()
                                metrics_best_thresh_test = dict()

                                # Benchmarks
                                BenchmarkLogistic(data_train, data_val, data_test)
                                BenchmarkXGB(data_train, data_val, data_test)
                                
                                # Pre processing
                                for model in modelsNames:
                                    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
                                    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
                                
                                # In processing
                                for quality in quality_constraints_meta:
                                    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
                                InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)
                                
                                tf.compat.v1.reset_default_graph()
                                sess = tf.compat.v1.Session()
                                InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)
                                sess.close()
                                
                                # Post processing
                                for model in modelsNames:
                                    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
                                    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
                                    for quality in quality_constraints_eqodds:
                                        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
                                    for key_metric in fair_metrics_optrej:
                                        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)

                                file =  dataset + '_' + operation + '_' + case + '_' + str(i)

                                resultsDict[file] = dict()
                                resultsDict[file]['methods'] = methods
                                resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
                                resultsDict[file]['metrics_sweep'] = metrics_sweep

                                with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
                                    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
                                with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
                                    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)
                
                            elif case == 'com':
                                # Obtain benchmarks and in proncessing models
                                modelsNames, modelsBenchmark, modelsPost, \
                                modelsTrain, modelsArgs = ObtainPrelDataMultiple(sensitive_attribute, privileged_groups, unprivileged_groups)

                                # Initialize dicts
                                methods = dict()

                                # Range of thresholds to evaluate our models
                                thresh_sweep = np.linspace(0.01, 1.0, 50)
                                metrics_sweep = dict()

                                # Store results from validation and test
                                metrics_best_thresh_validate = dict()
                                metrics_best_thresh_test = dict()

                                # Benchmarks
                                BenchmarkLogistic(data_train, data_val, data_test)
                                BenchmarkXGB(data_train, data_val, data_test)

                                # Pre processing + In processing
                                for model in modelsNames:
                                    if model == 'adversarial':
                                        tf.compat.v1.reset_default_graph()
                                        sess = tf.compat.v1.Session()
                                    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
                                    if model == 'adversarial':
                                        sess.close()
                                        tf.compat.v1.reset_default_graph()
                                        sess = tf.compat.v1.Session()
                                    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
                                    
                                    if model == 'adversarial':
                                        sess.close()

                                # Pre/In processing + Post processing
                                for quality in quality_constraints_meta:
                                    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
                                InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)

                                tf.compat.v1.reset_default_graph()
                                sess = tf.compat.v1.Session()
                                InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)

                                for model in modelsPost:
                                    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
                                    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
                                    for quality in quality_constraints_eqodds:
                                        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
                                    for key_metric in fair_metrics_optrej:
                                        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)
                                        
                                sess.close()

                                file = dataset + '_' + operation + '_' + case + '_' + str(i)
                                
                                resultsDict[file] = dict()
                                resultsDict[file]['methods'] = methods
                                resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
                                resultsDict[file]['metrics_sweep'] = metrics_sweep
                            
                                with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
                                    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
                                with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
                                    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

  df['AGE'] = -df['DAYS_BIRTH'].astype('float') / 365
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
 

epoch 0; iter: 0; batch classifier loss: 19599.070312; batch adversarial loss: 0.478175
epoch 1; iter: 0; batch classifier loss: 7672.593262; batch adversarial loss: 0.637683
epoch 2; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 3; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 4; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 5; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 6; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 7; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 8; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 9; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 10; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 11; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 12; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 13

  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)


epoch 0; iter: 0; batch classifier loss: 50881.707031; batch adversarial loss: 0.271140
epoch 1; iter: 0; batch classifier loss: 8102.904297; batch adversarial loss: 0.670772
epoch 2; iter: 0; batch classifier loss: 2803.181396; batch adversarial loss: 0.626958
epoch 3; iter: 0; batch classifier loss: 5260.230469; batch adversarial loss: 0.576814
epoch 4; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 5; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 6; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 7; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 8; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 9; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 10; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 11; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 12; iter: 0; batch classifier loss: nan; batch adve

  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, size)
  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  out = random_state.multivariate_normal(mean, cov, siz

epoch 0; iter: 0; batch classifier loss: 25287.775391; batch adversarial loss: 0.708744
epoch 1; iter: 0; batch classifier loss: 7809.575684; batch adversarial loss: 0.673612
epoch 2; iter: 0; batch classifier loss: 5610.402832; batch adversarial loss: 0.668349
epoch 3; iter: 0; batch classifier loss: 5290.039551; batch adversarial loss: 0.648790
epoch 4; iter: 0; batch classifier loss: 1486.434692; batch adversarial loss: 0.648592
epoch 5; iter: 0; batch classifier loss: 3229.215332; batch adversarial loss: 0.628925
epoch 6; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 7; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 8; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 9; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 10; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 11; iter: 0; batch classifier loss: nan; batch adversarial loss: nan
epoch 12; iter: 0; batch classi

  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)


ValueError: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [1]:
data_train

NameError: name 'data_train' is not defined

In [None]:
# Arguments for the iteration
argumentsLoadData = {
    'seed': seed
}



nvar = 1

datasets = ['Simulation', 'German', 'Homecredit']
data = 'Homecredit'

dataset = data + nvar + 'V'

# Load data
data_train, data_val, data_test, \
sensitive_attribute, privileged_groups, \
unprivileged_groups = loadDatasets[dataset](**argumentsLoadData)

In [None]:
case = 'ind'

# Obtain benchmarks
modelsNames, modelsTrain, modelsArgs = ObtainPrelDataSingle()
modelsBenchmark = modelsNames

# Initialize dicts
methods = dict()

# Range of thresholds to evaluate our models
thresh_sweep = np.linspace(0.01, 1.0, 50)
metrics_sweep = dict()

# Store results from validation and test
metrics_best_thresh_validate = dict()
metrics_best_thresh_test = dict()

# Benchmarks
BenchmarkLogistic(data_train, data_val, data_test)
BenchmarkXGB(data_train, data_val, data_test)

# Pre processing
for model in modelsNames:
    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)

# In processing
for quality in quality_constraints_meta:
    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)

tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()
InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)
sess.close()

# Post processing
for model in modelsNames:
    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
    for quality in quality_constraints_eqodds:
        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
    for key_metric in fair_metrics_optrej:
        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)
file = dataset + '_' + case + '_' + str(i)

resultsDict[file] = dict()
resultsDict[file]['methods'] = methods
resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
resultsDict[file]['metrics_sweep'] = metrics_sweep


with open('results/' + file + '.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('results/' + file + '_sweep.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
case = 'com'

# Obtain benchmarks and in proncessing models
modelsNames, modelsBenchmark, modelsPost, \
modelsTrain, modelsArgs = ObtainPrelDataMultiple(sensitive_attribute, privileged_groups, unprivileged_groups)

# Initialize dicts
methods = dict()

# Range of thresholds to evaluate our models
thresh_sweep = np.linspace(0.01, 1.0, 50)
metrics_sweep = dict()

# Store results from validation and test
metrics_best_thresh_validate = dict()
metrics_best_thresh_test = dict()

# Benchmarks
BenchmarkLogistic(data_train, data_val, data_test)
BenchmarkXGB(data_train, data_val, data_test)

# Pre processing + In processing
for model in modelsNames:
    if model == 'adversarial':
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
    if model == 'adversarial':
        sess.close()
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
    
    if model == 'adversarial':
        sess.close()

# Pre/In processing + Post processing
for quality in quality_constraints_meta:
    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)

tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()

InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)

for model in modelsPost:
    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
    for quality in quality_constraints_eqodds:
        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
    for key_metric in fair_metrics_optrej:
        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)
        
sess.close()

# Save the file
file = dataset + '_' + case + '_' + str(i)

resultsDict[file] = dict()
resultsDict[file]['methods'] = methods
resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
resultsDict[file]['metrics_sweep'] = metrics_sweep

with open('results/' + file + '.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('results/' + file + '_sweep.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
nvar = 2

operations = ['OR', 'AND', 'XOR']
operation = 'XOR'

datasets = ['Simulation', 'German', 'Homecredit']
data = 'Simulation'

loadDatasets = {
    'Simulation1V': simul1V,
    'Simulation2V': simul2V,
    'German1V': GermanDataset1V,
    'German2V': GermanDataset2V,
    'Homecredit1V': Homecredit1V,
    'Homecredit2V': Homecredit2V
}

dataset = data + str(nvar) + 'V'

argumentsLoadData = {
    'seed': seed,
    'operation': operation
}

data_train, data_val, data_test, \
sensitive_attribute, privileged_groups, unprivileged_groups, \
data_val_single, data_test_single = loadDatasets[dataset](**argumentsLoadData)


  df.loc[pos, label_name] = favorable_label
  df.loc[pos, label_name] = favorable_label


In [14]:
case = 'ind'

# Initialize dicts
methods = dict()

# Obtain benchmarks
modelsNames, modelsTrain, modelsArgs = ObtainPrelDataSingle()
modelsBenchmark = modelsNames

# Range of thresholds to evaluate our models
thresh_sweep = np.linspace(0.01, 1.0, 50)
metrics_sweep = dict()

# Store results from validation and test
metrics_best_thresh_validate = dict()
metrics_best_thresh_test = dict()

# Benchmarks
BenchmarkLogistic(data_train, data_val, data_test)
BenchmarkXGB(data_train, data_val, data_test)

# Pre processing
for model in modelsNames:
    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)

# In processing
for quality in quality_constraints_meta:
    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)
tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()
InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)

sess.close()

# Post processing
for model in modelsNames:
    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
    for quality in quality_constraints_eqodds:
        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
    for key_metric in fair_metrics_optrej:
        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)

# Save the file
file =  dataset + '_' + operation + '_' + case + '_' + str(i)

resultsDict[file] = dict()
resultsDict[file]['methods'] = methods
resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
resultsDict[file]['metrics_sweep'] = metrics_sweep

with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

KeyboardInterrupt: 

In [16]:
case = 'com'
data = 'Simulation'
i = 0

# Obtain benchmarks and in proncessing models
modelsNames, modelsBenchmark, modelsPost, \
modelsTrain, modelsArgs = ObtainPrelDataMultiple(sensitive_attribute, privileged_groups, unprivileged_groups)

measurement = 'bal_acc'
combination = []

# Initialize dicts
methods = dict()

# Range of thresholds to evaluate our models
thresh_sweep = np.linspace(0.01, 1.0, 50)
metrics_sweep = dict()

# Store results from validation and test
metrics_best_thresh_validate = dict()
metrics_best_thresh_test = dict()

# Benchmarks
BenchmarkLogistic(data_train, data_val, data_test)
BenchmarkXGB(data_train, data_val, data_test)

# Pre processing + In processing
for model in modelsNames:
    if model == 'adversarial':
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
    PreprocRW(data_train, data_val, data_test, privileged_groups, unprivileged_groups, model, do_results = True)
    if model == 'adversarial':
        sess.close()
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
    PreprocDI(data_train, data_val, data_test, sensitive_attribute, repair_level, model, do_results = True)
    
    if model == 'adversarial':
        sess.close()

# Pre/In processing + Post processing
for quality in quality_constraints_meta:
    InprocMeta(data_train, data_val, data_test, sensitive_attribute, quality, tau = 0.8, do_results = True)
InprocPI(data_train, data_val, data_test, sensitive_attribute, eta = 50.0, do_results = True)

tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()
InprocAdvs(data_train, data_val, data_test, privileged_groups, unprivileged_groups, do_results = True)

for model in modelsPost:
    PosprocPlatt(data_train, data_val, data_test, privileged_groups, model)
    PosprocEqoddsLABELS(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model)
    for quality in quality_constraints_eqodds:
        PosprocEqoddsSCORES(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, quality)
    for key_metric in fair_metrics_optrej:
        PosprocReject(data_train, data_val, data_test, unprivileged_groups, privileged_groups, model, key_metric)
        
sess.close()

file = dataset + '_' + operation + '_' + case + '_' + str(i)

resultsDict[file] = dict()
resultsDict[file]['methods'] = methods
resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
resultsDict[file]['metrics_sweep'] = metrics_sweep

with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)


epoch 0; iter: 0; batch classifier loss: 0.655120; batch adversarial loss: 0.734380
epoch 1; iter: 0; batch classifier loss: 0.499045; batch adversarial loss: 0.705493
epoch 2; iter: 0; batch classifier loss: 0.430724; batch adversarial loss: 0.805806
epoch 3; iter: 0; batch classifier loss: 0.461004; batch adversarial loss: 0.803801
epoch 4; iter: 0; batch classifier loss: 0.378912; batch adversarial loss: 0.884379
epoch 5; iter: 0; batch classifier loss: 0.439532; batch adversarial loss: 0.814672
epoch 6; iter: 0; batch classifier loss: 0.372998; batch adversarial loss: 0.860546
epoch 7; iter: 0; batch classifier loss: 0.439463; batch adversarial loss: 0.737609
epoch 8; iter: 0; batch classifier loss: 0.361792; batch adversarial loss: 0.773696
epoch 9; iter: 0; batch classifier loss: 0.355298; batch adversarial loss: 0.768550
epoch 10; iter: 0; batch classifier loss: 0.282246; batch adversarial loss: 0.739926
epoch 11; iter: 0; batch classifier loss: 0.356643; batch adversarial loss:

  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
  self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)


NameError: name 'i' is not defined

In [18]:
i = 1
file = dataset + '_' + operation + '_' + case + '_' + str(i)

resultsDict = dict()
resultsDict[file] = dict()
resultsDict[file]['methods'] = methods
resultsDict[file]['best_thresh_test'] = pd.DataFrame(metrics_best_thresh_test).T
resultsDict[file]['metrics_sweep'] = metrics_sweep

with open('results/best/' + data + '/' + file + '_best.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['best_thresh_test'], handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('results/sweep/' + data + '/' + file + '_sweep.pickle', 'wb') as handle:
    pickle.dump(resultsDict[file]['metrics_sweep'], handle, protocol=pickle.HIGHEST_PROTOCOL)

# Results

In [2]:

resultsDict = dict()
sweepsDict = dict()

for file in os.listdir('results/best'):
    name = file[:-12]
    with open('results/best/' + name + '_best.pickle', 'rb') as handle:
        resultsDict[name] = pickle.load(handle)
    with open('results/sweep/' + name + '_sweep.pickle', 'rb') as handle:
        sweepsDict[name] = pickle.load(handle)

names = resultsDict.keys()

In [3]:
def tidy_dataset(dataset):
    new_table = dataset.copy(deep = True)
    
    colNames = dataset.columns
    subset = [column for column in colNames if column not in ['best_threshold']]
    new_table.loc[:, subset] =  (new_table.loc[:, subset] - new_table.loc['logreg', subset])/new_table.loc['logreg', subset]*100
    return new_table

def compare_tables(dataset1, dataset2):
    new_table = dataset1.copy(deep = True) 
    new_table =  (dataset2 - dataset1)/dataset1*100
    return new_table



In [4]:
new_tables = dict()

for name in names:
    new_tables[name] = tidy_dataset(resultsDict[name])

    

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'float_format', "{:,.2f}".format):  # more options can be specified also
    display(new_tables['German1V_com_1'])

In [None]:
import matplotlib.pyplot as plt
plt.plot(new_tables['German1V_com_1']['separation'])

In [None]:
tabla_comp = compare_tables(resultsDict['German1V_com_1'], resultsDict['German2V_AND_com_1'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'float_format', "{:,.2f}".format):  # more options can be specified also
    display(tabla_comp)

In [None]:
sweepsDict['German1V_com_1']['DI_pir']['acc']

In [None]:
pip install empirical-attainment-func

In [None]:
import matplotlib.pyplot as plt


sweep = sweepsDict['German2V_XOR_ind_1']
method = 'adversarial'
metric1 = sweep[method]['separation']
metric2 = sweep[method]['acc']

total_dict = dict()
for method in sweep.keys():
    try:
        total_dict['separation'] += sweep[method]['separation']
        total_dict['independence'] += sweep[method]['independence']
        total_dict['sufficiency'] += sweep[method]['sufficiency']
        total_dict['acc'] += sweep[method]['acc']
    except:
        total_dict['separation'] = sweep[method]['separation']
        total_dict['independence'] = sweep[method]['independence']
        total_dict['sufficiency'] = sweep[method]['sufficiency']
        total_dict['acc'] = sweep[method]['acc']

def plot_pareto(sweepDict, metric1_name, metric2_name):

    metric1 = sweepDict[metric1_name]
    metric2 = sweepDict[metric2_name]

    metric1_np = np.array(metric1)
    metric2_np = np.array(metric2)
    
    frontier = np.zeros(np.shape(metric1_np)) 

    for value in metric1:
        index = np.where(metric1_np == value)
        maxim = max(metric2_np[index])
        frontier[index] = maxim 

    sorted_frontier = np.array([x for _,x in sorted(zip(metric1, frontier))])
    cum_sorted_frontier = np.maximum.accumulate(sorted_frontier)
    
    fig, ax = plt.subplots()
    ax.set_title('Trade offs')
    ax.set_xlabel(metric1_name)
    ax.set_ylabel(metric2_name)
    ax.step(np.sort(metric1_np), cum_sorted_frontier)
    return fig, ax

#sweep[method]
fig, ax = plot_pareto(total_dict, 'separation', 'acc')

In [None]:
np.where([1,2,3,1] == 1)

In [None]:
max(np.array(metric2)[np.array(metric1) == 0])

In [None]:
metric1

In [None]:
sweep['DI_']

# Multiprocessor

In [None]:
def openWrapper(fun):

    def sessWrap(*args, **kwargs):

        # Open 
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()

        fun(*args, **kwargs)

        sess.close()


    return sessWrap



class Multiprocessor():
    

    def __init__(self, preproc = None, inproc = None, postproc = None, results = None, dataset = None):
    
        self.data = dataset
        self.preproc = preproc
        self.inproc = inproc
        self.postproc = postproc
        self.results = results

        if self.inproc.__name__ == AdversarialDebiasing.__name__:
            self.isNN = True
        else:
            self.isNN = False

        return
    

    def obtain_data(self, **kwargs):
        
        if not self.dataset:
            return
        
        self.datasetName = dataset.__name__

        return

    
    def Preprocess(self, **kwargs):
        
        if not self.preproc:
            return
        
        self.preprocName = preproc.__name__
        preproc(**kwargs)

        return
        


    def Inprocess(self, **kwargs):

        if not self.inproc:
            return

        self.inprocName = inproc.__name__

        self.inprocess(kwargs)

        return
    


    def Postprocess(self, **kwargs):

        if not self.postproc:
            return
        
        self.postprocName = postproc.__name__

        return
    
    def Results(self, **kwargs):

        self.resultsDict = dict()



        



In [None]:
Multiprocessor(Reweighing, None, EqOddsPostprocessing, results, GermanDataset1V)

In [None]:
def operation1(a,b,c):
    return a+b/c

def operation2(a=1,b=2):
    return a/b

def operation3(func, **kwargs):
    return 1 + func(**kwargs)

args = {
    'a': 1,
    'b': 2
}
operation3(operation2, a=1, b=2)

In [None]:
T = 100000
n = T - 13
N = 1000

p = 1
for i in range(1000):
    p *= (n - i)/(T - i)

q = 1 - p
print(q*100)

In [None]:
def cu