# Implementation of "Fair Decisions Despite Imperfect Predictions" using the original fairness constraint

In [1]:
import numpy as np
from src.consequential_learning import collect_data, train
from src.feature_map import IdentityFeatureMap

## The parameters used by the the original authors  
Note: Learning rate decay is not yet implemented

In [2]:
training_parameters = {
    'dim_x': 1,
    'dim_s': 1,
    'time_steps':200,
    'batch_size':512,
    'num_iterations': 32,
    'learning_parameters': {
        'learning_rate': 0.5,
        'decay_rate': 0.8,
        'decay_step': 30
    }
    'fairness_rate':0,
    'cost_factor':0.55,
    'fraction_protected':0.3
}
training_parameters['dim_theta'] = training_parameters['dim_x'] + training_parameters['dim_s']
training_parameters['feature_map'] = IdentityFeatureMap(training_parameters['dim_theta'])
training_parameters['num_decisions'] = training_parameters['num_iterations'] * training_parameters['batch_size']

## Definition of the fairness function  
The original fairness constraint was defined as the difference of benefits $b_{P}^s$ for both of the settings of the protected attribute. This function defines both the fairness function as well as its derivative which is controlled by the parameter gradient=true/false

In [3]:
def benefit_function(x_s, s, sample_theta, policy, gradient):
    ips_weight, phi, log_gradient_denominator = policy.calculate_ips_weights_and_log_gradient(x_s, s, sample_theta)
    decision = policy(x_s, s).reshape(-1, 1)

    if gradient:
        grad_benefit = ((ips_weight/log_gradient_denominator) * decision * phi).sum(axis=0) / x_s.shape[0]
        return grad_benefit
    else:
        benefit = (ips_weight * decision).sum(axis=0) / x_s.shape[0]
        return benefit

def fairness_function(**fairness_kwargs):
    x = fairness_kwargs['x']
    s = fairness_kwargs['s']
    sample_theta = fairness_kwargs['sample_theta']
    policy = fairness_kwargs['policy']
    gradient = fairness_kwargs['gradient']

    pos_decision_idx = np.arange(s.shape[0]).reshape(-1, 1)

    s_0_idx = pos_decision_idx[s == 0]
    s_1_idx = pos_decision_idx[s == 1]

    return benefit_function(x[s_0_idx], s[s_0_idx], sample_theta, policy, gradient) - benefit_function(x[s_1_idx], s[s_1_idx], sample_theta, policy, gradient)


## Train the model

In [4]:
train(**training_parameters, fairness_function=fairness_function)

Time step 0: Utility -0.022609756097560992
Time step 1: Utility 0.04822194548221944
Time step 2: Utility 0.053966611479028676
Time step 3: Utility 0.059231196997080474
Time step 4: Utility 0.04743006752101417
Time step 5: Utility 0.06667124982813143
Time step 6: Utility 0.05536303630363034
Time step 7: Utility 0.05888642043876699
Time step 8: Utility 0.06499999999999997
Time step 9: Utility 0.06821736785329016
Time step 10: Utility 0.05416897506925206
Time step 11: Utility 0.0656658862415645
Time step 12: Utility 0.06253435953820778
Time step 13: Utility 0.06563822525597267
Time step 14: Utility 0.06591445632551032
Time step 15: Utility 0.0662212425953988
Time step 16: Utility 0.07600730124391561
Time step 17: Utility 0.07501724851662755
Time step 18: Utility 0.06472237417943105
Time step 19: Utility 0.07572171295662879
Time step 20: Utility 0.0751299704931853
Time step 21: Utility 0.07048143053645115
Time step 22: Utility 0.06324553023065373
Time step 23: Utility 0.07640043614556358
T