# Implementation the best parameters settings for COMPAS, Adult Credit and FICO Credit score datasets

In [1]:
import os
import sys
import numpy as np
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from src.util import mean_difference, get_list_of_seeds
from src.feature_map import IdentityFeatureMap
from src.functions import cost_utility
from src.plotting import plot_mean, plot_median
from src.training import train
from src.training_evaluation import UTILITY, COVARIANCE_OF_DECISION_DP
from src.policy import LogisticPolicy
from src.distribution import GermanCreditDistribution, AdultCreditDistribution, COMPASDistribution, FICODistribution
from src.optimization import PenaltyOptimizationTarget

unable to import 'smart_open.gcs', disabling that module


## Fairness Functions

In [3]:
def calc_benefit(decisions, ips_weights):
    if ips_weights is not None:
        decisions *= ips_weights

    return decisions


def calc_covariance(s, decisions, ips_weights):
    new_s = 1 - (2 * s)

    if ips_weights is not None:
        mu_s = np.mean(new_s * ips_weights, axis=0)
        d = decisions * ips_weights
    else:
        mu_s = np.mean(new_s, axis=0)
        d = decisions

    covariance = (new_s - mu_s) * d
    return covariance


def fairness_function_gradient(type, **fairness_kwargs):
    policy = fairness_kwargs["policy"]
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]

    if type == "BD_DP" or type == "BD_EOP":
        result = calc_benefit(decisions, ips_weights)
    elif type == "COV_DP":
        result = calc_covariance(s, decisions, ips_weights)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        result = calc_covariance(s, distance, ips_weights)

    log_gradient = policy.log_policy_gradient(x, s)
    grad = log_gradient * result

    if type == "BD_DP":
        return mean_difference(grad, s)
    elif type == "COV_DP":
        return np.mean(grad, axis=0)
    elif type == "COV_DP_DIST":
        return np.mean(grad, axis=0)
    elif type == "BD_EOP":
        y1_indices = np.where(y == 1)
        return mean_difference(grad[y1_indices], s[y1_indices])


def fairness_function(type, **fairness_kwargs):
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]
    policy = fairness_kwargs["policy"]

    if type == "BD_DP":
        benefit = calc_benefit(decisions, ips_weights)
        return mean_difference(benefit, s)
    elif type == "COV_DP":
        covariance = calc_covariance(s, decisions, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        covariance = calc_covariance(s, distance, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "BD_EOP":
        benefit = calc_benefit(decisions, ips_weights)
        y1_indices = np.where(y == 1)
        return mean_difference(benefit[y1_indices], s[y1_indices])

# COMPAS

In [None]:
bias = True
distribution = COMPASDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dimension

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 50,
        'batch_size':256,
        'learning_rate': 0.1,
        'learn_on_entire_history': False,
        'clip_weights': True,
        'change_iterations': 3
    },
    'data': {
        'num_train_samples': 4096,
        'num_test_samples': 1024
    },
    'evaluation': {
        UTILITY: {
            'measure_function': lambda s, y, decisions : np.mean(util_func(s=s,
                                                                           y=y,
                                                                           decisions=decisions)),
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': lambda s, y, decisions : fairness_function(
                type="COV_DP",
                x=None,
                s=s,
                y=y,
                decisions=decisions,
                ips_weights=None,
                policy=None),
            'detailed': False
        }
    }
}

In [None]:
if os.path.isfile('./cluster_experiments/seeds.npz'):
    seeds = np.load('./cluster_experiments/seeds.npz')
    training_parameters['data']["training_seeds"] = seeds["train"]
    training_parameters['data']["test_seed"] = seeds["test"]
else:
    seeds = {}
    train_seeds = get_list_of_seeds(200)
    test_seeds = get_list_of_seeds(1)
    training_parameters['data']["training_seeds"] = train_seeds
    training_parameters['data']["test_seed"] = test_seeds
    np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### No Fariness

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/COMPAS/NO_FAIRNESS"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=[0.0])

plot_mean(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_median_time.png".format(run_path))


### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)
# lambdas = np.linspace(0.00001, 0.0001, endpoint=True, num=10).tolist()
# lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
# lambdas.extend(np.linspace(0.001, 0.01, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.01, 0.1, endpoint=True, num=10).tolist())
# lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))
lambdas = np.geomspace(0.00001, 1.0, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_BEST/BD_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=10,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_EOP", **fp)
# lambdas = np.linspace(0.000001, 0.0001, endpoint=True, num=10).tolist()
# lambdas.extend(np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.001, 0.01, endpoint=True, num=10).tolist())
# lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))
lambdas = np.geomspace(0.00001, 1.0, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_BEST/BD_EOP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=15,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


# Adult Credit Data

In [None]:
bias = True
distribution = AdultCreditDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dimension

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 50,
        'batch_size':512,
        'learning_rate': 0.05,
        'learn_on_entire_history': False,
        'clip_weights': True
    },
    'data': {
        'num_train_samples': 16384,
        'num_test_samples': 4096
    },
    'evaluation': {
        UTILITY: {
            'measure_function': lambda s, y, decisions : np.mean(util_func(s=s,
                                                                           y=y,
                                                                           decisions=decisions)),
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': lambda s, y, decisions : fairness_function(
                type="COV_DP",
                x=None,
                s=s,
                y=y,
                decisions=decisions,
                ips_weights=None,
                policy=None),
            'detailed': False
        }
    }
}

In [None]:
if os.path.isfile('./cluster_experiments/seeds.npz'):
    seeds = np.load('./cluster_experiments/seeds.npz')
    training_parameters['data']["training_seeds"] = seeds["train"]
    training_parameters['data']["test_seed"] = seeds["test"]
else:
    seeds = {}
    train_seeds = get_list_of_seeds(200)
    test_seeds = get_list_of_seeds(1)
    training_parameters['data']["training_seeds"] = train_seeds
    training_parameters['data']["test_seed"] = test_seeds
    np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### No Fariness

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT/NO_FAIRNESS"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=[0.0])

plot_mean(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_median_time.png".format(run_path))


### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/BD_DP"
# lambdas = np.linspace(0.00001, 0.0001, endpoint=True, num=10).tolist()
# lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
# lambdas = np.linspace(0.001, 0.01, endpoint=True, num=10).tolist()
# lambdas = np.linspace(0.01, 0.1, endpoint=True, num=10).tolist()
# lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))
lambdas = np.geomspace(0.0001, 0.1, endpoint=True, num=10)

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=10,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_EOP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/BD_EOP"
lambdas = np.linspace(0.00001, 0.0001, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.001, 0.01, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.01, 0.1, endpoint=True, num=10).tolist()
lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("COV_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/COV_DP"
lambdas = np.linspace(0.00001, 0.0001, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.001, 0.01, endpoint=True, num=10).tolist()
lambdas = np.linspace(0.01, 0.1, endpoint=True, num=10).tolist()

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))

# FICO Credit Score Data

In [None]:
bias = True
distribution = FICODistribution(bias=bias, fraction_protected=0.5)
dim_theta = distribution.feature_dimension

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 150,
        'batch_size':128,
        'learning_rate': 0.01,
        'learn_on_entire_history': False,
        'clip_weights': True
    },
    'data': {
        'num_train_samples': 4096,
        'num_test_samples': 1024,
        'fix_seeds': True
    },
    'evaluation': {
        UTILITY: {
            'measure_function': lambda s, y, decisions : np.mean(util_func(s=s,
                                                                           y=y,
                                                                           decisions=decisions)),
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': lambda s, y, decisions : fairness_function(
                type="COV_DP",
                x=None,
                s=s,
                y=y,
                decisions=decisions,
                ips_weights=None,
                policy=None),
            'detailed': False
        }
    }
}

In [None]:
if os.path.isfile('./cluster_experiments/seeds.npz'):
    seeds = np.load('./cluster_experiments/seeds.npz')
    training_parameters['data']["training_seeds"] = seeds["train"]
    training_parameters['data']["test_seed"] = seeds["test"]
else:
    seeds = {}
    train_seeds = get_list_of_seeds(200)
    test_seeds = get_list_of_seeds(1)
    training_parameters['data']["training_seeds"] = train_seeds
    training_parameters['data']["test_seed"] = test_seeds
    np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/FICO_BEST/BD_DP"
lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
lambdas.extend(np.linspace(0.001, 0.01, endpoint=True, num=10).tolist())
lambdas.extend(np.linspace(0.01, 0.1, endpoint=True, num=10).tolist())
lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_EOP", **fp)

training_parameters["save_path"] = "../res/local_experiments/FICO_BEST/BD_EOP"
lambdas = np.linspace(0.001, 0.01, endpoint=True, num=10).tolist()
lambdas.extend(np.linspace(0.01, 0.1, endpoint=True, num=10).tolist())
lambdas.extend(np.linspace(0.1, 1.0, endpoint=True, num=10).tolist())
lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))


### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("COV_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/FICO_BEST/COV_DP"
lambdas = np.linspace(0.01, 0.1, endpoint=True, num=10).tolist()
lambdas.extend(np.linspace(0.1, 1.0, endpoint=True, num=10).tolist())
lambdas.extend(np.linspace(1.0, 10.0, endpoint=True, num=10).tolist())
lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))

# German Credit Data

In [None]:
bias = True
distribution = GermanCreditDistribution(bias=bias, test_percentage=0.3)
dim_theta = distribution.feature_dimension

# def util_func(**util_params):
#     util = cost_utility(cost_factor=0.5, **util_params)
#     return util

def wat(**utility_parameters):
    decisions = utility_parameters["decisions"]
    y = utility_parameters["y"]
    results = np.zeros_like(y)

    decisions_good_idx = np.where(decisions == 1)
    results[decisions_good_idx] += ((1 - y[decisions_good_idx]) * -5)

    decisions_bad_idx = np.where(decisions == 0)
    results[decisions_bad_idx] += y[decisions_bad_idx]*-1
    return results

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': wat
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 50,
        'batch_size':64,
        'learning_rate': 0.1,
        'learn_on_entire_history': False,
        'clip_weights': True
    },
    'data': {
        'num_train_samples': 512,
        'num_test_samples': 256
    },
    'evaluation': {
        UTILITY: {
            'measure_function': lambda s, y, decisions : np.mean(util_func(s=s,
                                                                           y=y,
                                                                           decisions=decisions)),
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': lambda s, y, decisions : fairness_function(
                type="COV_DP",
                x=None,
                s=s,
                y=y,
                decisions=decisions,
                ips_weights=None,
                policy=None),
            'detailed': False
        }
    }
}

In [None]:
if os.path.isfile('./cluster_experiments/seeds.npz'):
    seeds = np.load('./cluster_experiments/seeds.npz')
    training_parameters['data']["training_seeds"] = seeds["train"]
    training_parameters['data']["test_seed"] = seeds["test"]
else:
    seeds = {}
    train_seeds = get_list_of_seeds(200)
    test_seeds = get_list_of_seeds(1)
    training_parameters['data']["training_seeds"] = train_seeds
    training_parameters['data']["test_seed"] = test_seeds
    np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### No Fariness

In [6]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/GERMAN/NO_FAIRNESS"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=1,
    asynchronous=True,
    fairness_rates=[0.0])

plot_mean(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_median_time.png".format(run_path))

## STARTED // LR = 0.1 // TS = 200 // E = 50 // BS = 64 // FR = 0.0 ##
[[1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [5.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [5.]
 [5.]
 [1.]
 [0.]
 [1.]
 [5.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [5.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [5.]
 [0.]
 [1.]
 [0.]
 [1.]
 [5.]
 [0.]
 [0.]
 [0.]
 [5.]
 [1.]
 [0.]
 [0.]
 [0.]
 [5.]
 [0.]
 [0.]
 [5.]
 [5.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [5.]
 [0.]
 [0.]
 [1.]
 [5.]
 [5.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [5.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [5.]
 [1.]
 [1.]
 [5.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [5.]
 [0.]
 [0.]
 [1.]
 [5.]
 [5.]
 [0.]
 [1.]
 [0.]
 [5.]
 [0.]
 [0.]
 [5.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [5.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [5.]
 [0.]
 [1.]
 [0.]
 [1.]
 [5.]
 [0.]

Process ForkPoolWorker-16:
Process ForkPoolWorker-8:
Process ForkPoolWorker-4:
Process ForkPoolWorker-15:
Process ForkPoolWorker-14:
Process ForkPoolWorker-5:
Process ForkPoolWorker-3:
Process ForkPoolWorker-6:
Process ForkPoolWorker-1:
Process ForkPoolWorker-13:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Process ForkPoolWorker-7:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    

KeyboardInterrupt: 

### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/GERMAN/BD_DP"
lambdas = np.geomspace(0.000000001, 0.001, num=10)

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=5,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.accuracy()],
          fairness_measures=[statistics.demographic_parity(),
                             statistics.equality_of_opportunity(),
                             statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                              "Covariance of Decision (DP)")],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                    statistics.accuracy()],
            fairness_measures=[statistics.demographic_parity(),
                                 statistics.equality_of_opportunity(),
                                 statistics.get_additonal_measure(COVARIANCE_OF_DECISION_DP,
                                                                  "Covariance of Decision (DP)")],
            file_path="{}/results_median.png".format(run_path))