In [1]:
import numpy as np
from src.consequential_learning import collect_data, train
from src.feature_map import IdentityFeatureMap

In [2]:
DIM_X = 1
DIM_S = 1
DIM_THETA = DIM_S + DIM_X
T = 100
NUM_DECISIONS = 2000
NUM_ITERATIONS = 200
BATCH_SIZE = 128
LEARNING_RATE = 0.01
FAIRNESS_RATE = 0.3
COST_FACTOR = 0.6
FRACTION_PROTECTED = 0.3

In [3]:
def benefit_function(x_s, s, sample_theta, policy, gradient):
    ips_weight, phi, log_gradient_denominator = policy.calculate_ips_weights_and_log_gradient(x_s, s, sample_theta)
    decision = policy(x_s, s).reshape(-1, 1)

    if gradient:
        grad_benefit = ((ips_weight/log_gradient_denominator) * decision * phi).sum(axis=0) / x_s.shape[0]
        return grad_benefit
    else:
        benefit = (ips_weight * decision).sum(axis=0) / x_s.shape[0]
        return benefit

def fairness_function(**fairness_kwargs):
    x = fairness_kwargs['x']
    s = fairness_kwargs['s']
    sample_theta = fairness_kwargs['sample_theta']
    policy = fairness_kwargs['policy']
    gradient = fairness_kwargs['gradient']

    pos_decision_idx = np.arange(s.shape[0]).reshape(-1, 1)

    s_0_idx = pos_decision_idx[s == 0]
    s_1_idx = pos_decision_idx[s == 1]

    return benefit_function(x[s_0_idx], s[s_0_idx], sample_theta, policy, gradient) - benefit_function(x[s_1_idx], s[s_1_idx], sample_theta, policy, gradient)


In [5]:
train(DIM_S, DIM_X,DIM_THETA, COST_FACTOR, NUM_DECISIONS, FRACTION_PROTECTED, LEARNING_RATE, FAIRNESS_RATE, BATCH_SIZE, NUM_ITERATIONS, T, fairness_function, IdentityFeatureMap(DIM_THETA))

Utility: 0.02517349196564475
Time step 0
Utility: -0.013136246024852598
Time step 1
Utility: 0.029557910703052796
Time step 2
Utility: 0.008851785893786717
Time step 3
Utility: 0.016877557981868227
Time step 4
Utility: -0.0005801483028071713
Time step 5
Utility: 0.01073817163785595
Time step 6
Utility: 0.015782005806215752
Time step 7
Utility: 0.025828615851821723
Time step 8
Utility: 0.03380080086715627
Time step 9
Utility: 0.03249025069103156
Time step 10
Utility: -0.0016682654815615675
Time step 11
Utility: 0.02021633919648297
Time step 12
Utility: 0.006098017956788644
Time step 13
Utility: 0.025208950253252153
Time step 14
Utility: 0.021447722684614354
Time step 15
Utility: 0.02087229193359588
Time step 16
Utility: 0.023241208837287725
Time step 17
Utility: 0.04842762022285576
Time step 18
Utility: 0.03668088203089144
Time step 19
Utility: 0.021394097345027934
Time step 20
Utility: 0.028103547636419927
Time step 21
Utility: 0.045908457199162186
Time step 22
Utility: 0.0470412953151

KeyboardInterrupt: 