# Implementation the best parameters settings for COMPAS, Adult Credit and FICO Credit score datasets

In [1]:
import os
import sys
import numpy as np
import torch
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from src.util import mean_difference, mean, get_list_of_seeds
from src.functions import cost_utility, cost_utility_probability
from src.plotting import plot_mean, plot_median
from src.training import train
from src.training_evaluation import UTILITY, COVARIANCE_OF_DECISION_DP
from src.policy import NeuralNetworkPolicy
from src.distribution import COMPASDistribution
from src.optimization import PenaltyOptimizationTarget, LagrangianOptimizationTarget

unable to import 'smart_open.gcs', disabling that module


## Fairness Functions

In [2]:
def calc_benefit(decisions, ips_weights):
    if ips_weights is not None:
        decisions = decisions * ips_weights

    return decisions

def calc_covariance(s, decisions, ips_weights):
    new_s = 1 - (2 * s)

    if ips_weights is not None:
        mu_s = mean(new_s * ips_weights, axis=0)
        d = decisions * ips_weights
    else:
        mu_s = mean(new_s, axis=0)
        d = decisions

    covariance = (new_s - mu_s) * d
    return covariance

def fairness_function(type=None, **fairness_kwargs):
    s = fairness_kwargs["s"]
    ips_weights = fairness_kwargs["ips_weights"] if "ips_weights" in fairness_kwargs else None
    decisions = fairness_kwargs["decision_probabilities"] if ips_weights is None \
        else fairness_kwargs["decisions"]
    y = fairness_kwargs["y"]

    if type == "BD_DP":
        benefit = calc_benefit(decisions, ips_weights)
        return mean_difference(benefit, s)
    elif type == "COV_DP":
        covariance = calc_covariance(s, decisions, ips_weights)
        return mean(covariance, axis=0)
    elif type == "BD_EOP":
        benefit = calc_benefit(decisions, ips_weights)
        y1_indices = np.where(y.squeeze() == 1)
        return mean_difference(benefit[y1_indices], s[y1_indices])

def eval_covariance_of_decision(**fairness_params):
    s = fairness_params["s"]
    decisions = fairness_params["decisions"]

    covariance = calc_covariance(s, decisions, None)
    return mean(covariance, axis=0)

def eval_utility(**util_params):
    return cost_utility(cost_factor=0.5, **util_params)

def no_fairness(**fairness_kwargs):
    return 0.0

def utility(**util_params):
    return cost_utility_probability(cost_factor=0.5, **util_params)

def covariance_of_decision(**fairness_params):
    return fairness_function(
        type="COV_DP",
        **fairness_params)

def benefit_difference_dp(**fairness_params):
    return fairness_function(
        type="BD_DP",
        **fairness_params)

def benefit_difference_eop(**fairness_params):
    return fairness_function(
        type="BD_EOP",
        **fairness_params)

# COMPAS

In [3]:
bias = True
distribution = COMPASDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dimension

training_parameters = {
    'model': NeuralNetworkPolicy(dim_theta, False),
    'distribution': distribution,
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 50,
        'batch_size':128,
        'learning_rate': 0.01,
        'learn_on_entire_history': False,
        'clip_weights': True
    },
    'data': {
        'num_train_samples': 4096,
        'num_test_samples': 1024
    },
    'evaluation': {
        UTILITY: {
            'measure_function': eval_utility,
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': eval_covariance_of_decision,
            'detailed': False
        }
    }
}

if os.path.isfile('./cluster_experiments/seeds.npz'):
    seeds = np.load('./cluster_experiments/seeds.npz')
    training_parameters['data']["training_seeds"] = seeds["train"]
    training_parameters['data']["test_seed"] = seeds["test"]
else:
    seeds = {}
    train_seeds = get_list_of_seeds(200)
    test_seeds = get_list_of_seeds(1)
    training_parameters['data']["training_seeds"] = train_seeds
    training_parameters['data']["test_seed"] = test_seeds
    np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### No Fariness

In [None]:
training_parameters["optimization_target"] = \
    PenaltyOptimizationTarget(0.0, utility, no_fairness)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/NO_FAIRNESS"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=10,
    asynchronous=False,
    fairness_rates=[0.0])

plot_mean(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
            x_label="Time steps",
            x_scale="linear",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                  statistics.demographic_parity(),
                                  statistics.equality_of_opportunity()],
            fairness_measures=[],
            file_path="{}/results_median_time.png".format(run_path))


### Benefit Difference: DP

In [None]:
training_parameters["optimization_target"] = \
    PenaltyOptimizationTarget(0.0, utility, benefit_difference_dp)

lambdas = np.geomspace(0.001, 100.0, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/BD_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=10,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
            fairness_measures=[],
            file_path="{}/results_median.png".format(run_path))

### Benefit Difference Dual Gradient

In [4]:
training_parameters["optimization_target"] = \
    LagrangianOptimizationTarget(0.0, utility, benefit_difference_eop)

training_parameters["lagrangian_optimization"] = {
    "epochs": 10,
    "batch_size": 4096,
    "learning_rate": 1.0
}

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/BD_DP_DG"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=5,
    asynchronous=False,
    fairness_rates=[0.0])


plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",
            performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
            fairness_measures=[],
            file_path="{}/results_median.png".format(run_path))

## STARTED // LR = 0.01 // TS = 200 // E = 50 // BS = 128 // FR = 0.0 ##


KeyboardInterrupt: 