# Implementation the best parameters settings for COMPAS, Adult Credit and FICO Credit score datasets

In [2]:
import os
import sys
import numpy as np
import torch
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from src.util import mean_difference, get_list_of_seeds
from src.functions import cost_utility, cost_utility_probability
from src.plotting import plot_mean, plot_median
from src.training import train
from src.training_evaluation import UTILITY, COVARIANCE_OF_DECISION_DP
from src.policy import NeuralNetworkPolicy
from src.distribution import COMPASDistribution
from src.optimization import PenaltyOptimizationTarget

unable to import 'smart_open.gcs', disabling that module


## Fairness Functions

In [3]:
def calc_benefit(decisions):
    return decisions

def calc_covariance(s, decisions):
    new_s = 1 - (2 * s)
    mu_s = np.mean(new_s, axis=0)
    d = decisions
    covariance = (new_s - mu_s) * d
    return covariance

def fairness_function(type, **fairness_kwargs):
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decision_probabilities = fairness_kwargs["decision_probabilities"]
    ips_weights = fairness_kwargs["ips_weights"]

    if type == "BD_DP":
        benefit = calc_benefit(decision_probabilities)
        return mean_difference(benefit, s)
    elif type == "COV_DP":
        covariance = calc_covariance(s, decision_probabilities)
        return np.mean(covariance, axis=0)
    elif type == "BD_EOP":
        benefit = calc_benefit(decision_probabilities, ips_weights)
        y1_indices = np.where(y == 1)
        return mean_difference(benefit[y1_indices], s[y1_indices])

# COMPAS

In [4]:
bias = True
distribution = COMPASDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dimension

training_parameters = {
    'model': NeuralNetworkPolicy(dim_theta, False),
    'distribution': distribution,
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 50,
        'batch_size':128,
        'learning_rate': 0.1,
        'learn_on_entire_history': False,
        'clip_weights': True
    },
    'data': {
        'num_train_samples': 4096,
        'num_test_samples': 1024,
        'fix_seeds': True
    },
    'evaluation': {
        UTILITY: {
            'measure_function': lambda s, y, decisions : np.mean(cost_utility(cost_factor=0.5,
                                                                              y=y,
                                                                              decisions=decisions)),
            'detailed': False
        },
        COVARIANCE_OF_DECISION_DP: {
            'measure_function': lambda s, y, decisions : fairness_function(
                type="COV_DP",
                x=None,
                s=s,
                y=y,
                decision_probabilities=decisions,
                ips_weights=None,
                policy=None),
            'detailed': False
        }
    }
}

# if os.path.isfile('./cluster_experiments/seeds.npz'):
#     seeds = np.load('./cluster_experiments/seeds.npz')
#     training_parameters['data']["training_seeds"] = seeds["train"]
#     training_parameters['data']["test_seed"] = seeds["test"]
# else:
#     seeds = {}
#     train_seeds = get_list_of_seeds(200)
#     test_seeds = get_list_of_seeds(1)
#     training_parameters['data']["training_seeds"] = train_seeds
#     training_parameters['data']["test_seed"] = test_seeds
#     np.savez('./cluster_experiments/seeds.npz', train=train_seeds, test=test_seeds)

### No Fariness

In [8]:
training_parameters["optimization_target"] = \
    PenaltyOptimizationTarget(0.0,
                              lambda **util_params: torch.mean(cost_utility_probability(cost_factor=0.5, **util_params)),
                              lambda **util_params: fairness_function("BD_DP", **util_params))

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/NO_FAIRNESS"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=5,
    asynchronous=False,
    fairness_rates=[10.0])

plot_mean(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(x_values=range(training_parameters["parameter_optimization"]["time_steps"] + 1),
          x_label="Time steps",
          x_scale="linear",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_median_time.png".format(run_path))


## STARTED // LR = 0.1 // TS = 200 // E = 50 // BS = 128 // FR = 10.0 ##
## ENDED // LR = 0.1 // TS = 200 // E = 50 // BS = 128 // FR = 10.0 ##


### Benefit Difference: DP

In [None]:
training_parameters["optimization_target"] = \
    PenaltyOptimizationTarget(0.0,
                              lambda **util_params: torch.mean(cost_utility_probability(cost_factor=0.5, **util_params)),
                              lambda **util_params: fairness_function("BD_DP", **util_params))

# lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
# lambdas.extend(np.linspace(0.001, 0.01, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.01, 0.1, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.1, 1.0, endpoint=True, num=10).tolist())
# lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))
lambdas = np.geomspace(0.000001, 1.0, endpoint=True, num=15)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/BD_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(x_values=lambdas,
          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",

          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
            file_path="{}/results_median.png".format(run_path))


### Benefit Difference: EOP

In [None]:
training_parameters["optimization_target"] = \
    PenaltyOptimizationTarget(0.0,
                              lambda **util_params: torch.mean(cost_utility_probability(cost_factor=0.5, **util_params)),
                              lambda **util_params: fairness_function("BD_EOP", **util_params))
# lambdas = np.linspace(0.0001, 0.001, endpoint=True, num=10).tolist()
# lambdas.extend(np.linspace(0.001, 0.01, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.01, 0.1, endpoint=True, num=10).tolist())
# lambdas.extend(np.linspace(0.1, 1.0, endpoint=True, num=10).tolist())
# lambdas = np.sort(np.unique(np.array(lambdas, dtype=float)))

lambdas = np.geomspace(0.000001, 1.0, endpoint=True, num=15)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_NN/BD_EOP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)


plot_mean(x_values=lambdas,          x_label="Penalty Constant",
          x_scale="log",
          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
          file_path="{}/results_mean.png".format(run_path))
plot_median(x_values=lambdas,
            x_label="Penalty Constant",
            x_scale="log",

          performance_measures=[statistics.get_additonal_measure(UTILITY, "Utility"),
                                statistics.demographic_parity(),
                                statistics.equality_of_opportunity()],
          fairness_measures=[],
            file_path="{}/results_median.png".format(run_path))