# Implementation the best parameters settings for COMPAS, Adult Credit and FICO Credit score datasets

In [1]:
import os
import sys
import numpy as np
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from src.util import mean_difference
from src.feature_map import IdentityFeatureMap
from src.functions import cost_utility
from src.plotting import plot_mean, plot_median
from src.training import train
from src.training_evaluation import Statistics
from src.policy import LogisticPolicy
from src.distribution import AdultCreditDistribution, COMPASDistribution
from src.optimization import PenaltyOptimizationTarget

unable to import 'smart_open.gcs', disabling that module


## Fairness Functions

In [2]:
def calc_benefit(decisions, ips_weights):
    if ips_weights is not None:
        decisions *= ips_weights

    return decisions


def calc_covariance(s, decisions, ips_weights):
    new_s = 1 - (2 * s)

    if ips_weights is not None:
        mu_s = np.mean(new_s * ips_weights, axis=0)
        d = decisions * ips_weights
    else:
        mu_s = np.mean(new_s, axis=0)
        d = decisions

    covariance = (new_s - mu_s) * d
    return covariance


def fairness_function_gradient(type, **fairness_kwargs):
    policy = fairness_kwargs["policy"]
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]

    if type == "BD_DP" or type == "BD_EOP":
        result = calc_benefit(decisions, ips_weights)
    elif type == "COV_DP":
        result = calc_covariance(s, decisions, ips_weights)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        result = calc_covariance(s, distance, ips_weights)

    log_gradient = policy.log_policy_gradient(x, s)
    grad = log_gradient * result

    if type == "BD_DP":
        return mean_difference(grad, s)
    elif type == "COV_DP":
        return np.mean(grad, axis=0)
    elif type == "COV_DP_DIST":
        return np.mean(grad, axis=0)
    elif type == "BD_EOP":
        y1_indices = np.where(y == 1)
        return mean_difference(grad[y1_indices], s[y1_indices])


def fairness_function(type, **fairness_kwargs):
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]
    policy = fairness_kwargs["policy"]

    if type == "BD_DP":
        benefit = calc_benefit(decisions, ips_weights)
        return mean_difference(benefit, s)
    elif type == "COV_DP":
        covariance = calc_covariance(s, decisions, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        covariance = calc_covariance(s, distance, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "BD_EOP":
        benefit = calc_benefit(decisions, ips_weights)
        y1_indices = np.where(y == 1)
        return mean_difference(benefit[y1_indices], s[y1_indices])

# COMPAS

In [3]:
bias = True
distribution = COMPASDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dim

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 40,
        'batch_size':128,
        'learning_rate': 0.01,
        'learn_on_entire_history': False
    },
    'data': {
        'num_train_samples': 4096,
        'num_test_samples': 1024,
        'fix_seeds': True
    }
}

### Benefit Difference: DP

In [4]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)
# lambdas = [0.00005, 0.00006, 0.00007, 0.00008, 0.00009,
#            0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009,
#            0.001, 0.002, 0.003]
lambdas = np.logspace(-6, -1, base=10, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_BEST/BD_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=15,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_median_time.png".format(run_path))


## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 1e-06 ##
## ENDED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 1e-06 ##
## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 3.5938136638046257e-06 ##
## ENDED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 3.5938136638046257e-06 ##
## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 1.2915496650148827e-05 ##
## ENDED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 1.2915496650148827e-05 ##
## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 4.641588833612782e-05 ##
## ENDED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 4.641588833612782e-05 ##
## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 0.0001668100537200059 ##
## ENDED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 0.0001668100537200059 ##
## STARTED // LR = 0.01 // TS = 200 // E = 40 // BS = 128 // FR = 0.0005994842503189409 ##
## ENDED // LR = 0.01 // TS = 200 // 

### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_EOP", **fp)
# lambdas = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009,
#            0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008]
lambdas = np.logspace(-6, -1, base=10, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_BEST/BD_EOP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=15,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_median_time.png".format(run_path))

### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("COV_DP", **fp)
# lambdas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009,
#            0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
#            0.01]
lambdas = np.logspace(-6, -1, base=10, endpoint=True, num=10)

training_parameters["save_path"] = "../res/local_experiments/COMPAS_BEST/COV_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=15,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_median_time.png".format(run_path))

# Adult Credit Data

In [None]:
bias = True
distribution = AdultCreditDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dim

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': PenaltyOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 60,
        'batch_size':64,
        'learning_rate': 0.05,
        'decay_rate': 1,
        'decay_step': 10000,
        'num_batches': 256,
        'fix_seeds': True
    },
    'test': {
        'num_samples': 1000000
    }
}

### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/BD_DP"
lambdas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009,
           0.001, 0.002, 0.003, 0.004]

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path))
plot_median(statistics, "{}/results_median_time.png".format(run_path))


### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("BD_EOP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/BD_EOP"
lambdas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009,
           0.001]

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_median_time.png".format(run_path))


### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("COV_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/ADULT_BEST/COV_DP"
lambdas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009,
           0.001]

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True,
    fairness_rates=lambdas)

plot_mean(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_mean_time.png".format(run_path))
plot_median(statistics=statistics,
          performance_measures=[Statistics.UTILITY, Statistics.ACCURACY],
          fairness_measures=[Statistics.DEMOGRAPHIC_PARITY, Statistics.EQUALITY_OF_OPPORTUNITY],
          file_path="{}/results_median_time.png".format(run_path))

# plt.plot(statistics.results[Statistics.X_VALUES], statistics.performance(Statistics.ACCURACY, Statistics.MEDIAN))
# plt.savefig("{}/accuracy.png".format(run_path))
