## Implementation the dual gradient method for COMPAS and Adult Credit

In [1]:
import os
import sys
import numpy as np
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from src.util import mean_difference
from src.feature_map import IdentityFeatureMap
from src.functions import cost_utility
from src.plotting import plot_mean, plot_median
from src.training import train
from src.policy import LogisticPolicy
from src.training_evaluation import Statistics
from src.distribution import AdultCreditDistribution, COMPASDistribution, FICODistribution
from src.optimization import PenaltyOptimizationTarget, LagrangianOptimizationTarget

unable to import 'smart_open.gcs', disabling that module


## Fairness Functions

In [2]:
def calc_benefit(decisions, ips_weights):
    if ips_weights is not None:
        decisions *= ips_weights

    return decisions


def calc_covariance(s, decisions, ips_weights):
    new_s = 1 - (2 * s)

    if ips_weights is not None:
        mu_s = np.mean(new_s * ips_weights, axis=0)
        d = decisions * ips_weights
    else:
        mu_s = np.mean(new_s, axis=0)
        d = decisions

    covariance = (new_s - mu_s) * d
    return covariance


def fairness_function_gradient(type, **fairness_kwargs):
    policy = fairness_kwargs["policy"]
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]

    if type == "BD_DP" or type == "BD_EOP":
        result = calc_benefit(decisions, ips_weights)
    elif type == "COV_DP":
        result = calc_covariance(s, decisions, ips_weights)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        result = calc_covariance(s, distance, ips_weights)

    log_gradient = policy.log_policy_gradient(x, s)
    grad = log_gradient * result

    if type == "BD_DP":
        return mean_difference(grad, s)
    elif type == "COV_DP":
        return np.mean(grad, axis=0)
    elif type == "COV_DP_DIST":
        return np.mean(grad, axis=0)
    elif type == "BD_EOP":
        y1_indices = np.where(y == 1)
        return mean_difference(grad[y1_indices], s[y1_indices])


def fairness_function(type, **fairness_kwargs):
    x = fairness_kwargs["x"]
    s = fairness_kwargs["s"]
    y = fairness_kwargs["y"]
    decisions = fairness_kwargs["decisions"]
    ips_weights = fairness_kwargs["ips_weights"]
    policy = fairness_kwargs["policy"]

    if type == "BD_DP":
        benefit = calc_benefit(decisions, ips_weights)
        return mean_difference(benefit, s)
    elif type == "COV_DP":
        covariance = calc_covariance(s, decisions, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "COV_DP_DIST":
        phi = policy.feature_map(policy._extract_features(x, s))
        distance = np.matmul(phi, policy.theta).reshape(-1, 1)
        covariance = calc_covariance(s, distance, ips_weights)
        return np.mean(covariance, axis=0)
    elif type == "BD_EOP":
        benefit = calc_benefit(decisions, ips_weights)
        y1_indices = np.where(y == 1)
        return mean_difference(benefit[y1_indices], s[y1_indices])

# COMPAS

In [3]:
bias = True
distribution = COMPASDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dim

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': LagrangianOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'batch_size':128,
        'epochs': 60,
        'fix_seeds': True,
        'learning_rate': 0.01,
        'learn_on_entire_history': False,
        'num_batches': 32,
        'time_steps':200
    },
    'lagrangian_optimization': {
        'batch_size':128,
        'epochs': 10,
        'learning_rate': 0.1,
        'num_batches': 32
    },
    'test': {
        'num_samples': 10000
    }
}

### Benefit Difference: DP

In [4]:
training_parameters['optimization_target']['parameters']['fairness_function'] \
    = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] \
    = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/COMPAS/BD_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=10,
    asynchronous=True)

plot_mean(statistics,
          "{}/results_mean_time.png".format(run_path),
          model_parameters=model_parameters)
plot_median(statistics,
            "{}/results_median_time.png".format(run_path),
            model_parameters=model_parameters)


## STARTED // LR = 0.01 // TS = 200 // E = 60 // BS = 128 // NB = 32 // FR = 0.0 ##
## ENDED // LR = 0.01 // TS = 200 // E = 60 // BS = 128 // NB = 32 // FR = 0.0 ##


Process ForkPoolWorker-6:
Process ForkPoolWorker-9:
Process ForkPoolWorker-1:
Process ForkPoolWorker-8:
Process ForkPoolWorker-5:
Process ForkPoolWorker-14:
Process ForkPoolWorker-2:
Process ForkPoolWorker-16:
Process ForkPoolWorker-3:
Process ForkPoolWorker-7:
Process ForkPoolWorker-15:
Process ForkPoolWorker-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/fkretschmar/miniconda3/envs/masters-thesis/lib/python3.7

### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("BD_EOP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/COMPAS/BD_EOP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)

### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("COV_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/COMPAS/COV_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)

### Covariance of distance: DP

In [5]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("COV_DP_DIST", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("COV_DP_DIST", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/COMPAS/COV_DP"
statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)

## STARTED // LR = 0.01 // TS = 200 // E = 60 // BS = 128 // NB = 32 // FR = 0.0 ##


ValueError: p < 0, p > 1 or p contains NaNs

# Adult Credit Data

In [None]:
bias = True
distribution = AdultCreditDistribution(bias=bias, test_percentage=0.2)
dim_theta = distribution.feature_dim

def util_func(**util_params):
    util = cost_utility(cost_factor=0.5, **util_params)
    return util

training_parameters = {
    'model':{
        'constructor': LogisticPolicy,
        'parameters': {
            "theta": np.zeros((dim_theta)),
            "feature_map": IdentityFeatureMap(dim_theta),
            "use_sensitive_attributes": False
        }
    },
    'distribution': distribution,
    'optimization_target': {
        'constructor': LagrangianOptimizationTarget,
        'parameters': {
            'utility_function': util_func
        }
    },
    'parameter_optimization': {
        'time_steps':200,
        'epochs': 60,
        'batch_size':64,
        'learning_rate': 0.05,
        'decay_rate': 1,
        'decay_step': 10000,
        'num_batches': 256,
        'fix_seeds': True
    },
    'lagrangian_optimization': {
        'batch_size':64,
        'epochs': 10,
        'learning_rate': 0.00001,
        'num_batches': 256
    },
    'test': {
        'num_samples': 1000000
    }
}

### Benefit Difference: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("BD_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("BD_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/ADULT/BD_DP"

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)


### Benefit Difference: EOP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("BD_EOP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("BD_EOP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/ADULT/BD_EOP"

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)


### Covariance of decision: DP

In [None]:
training_parameters['optimization_target']['parameters']['fairness_function'] = lambda **fp : fairness_function("COV_DP", **fp)
training_parameters['optimization_target']['parameters']['fairness_gradient_function'] = lambda **fp : fairness_function_gradient("COV_DP", **fp)

training_parameters["save_path"] = "../res/local_experiments/DUAL_GRADIENT/ADULT/COV_DP"

statistics, model_parameters, run_path = train(
    training_parameters,
    iterations=30,
    asynchronous=True)

plot_mean(statistics, "{}/results_mean_time.png".format(run_path), model_parameters=model_parameters)
plot_median(statistics, "{}/results_median_time.png".format(run_path), model_parameters=model_parameters)
