In [6]:
import numpy as np
from src.consequential_learning import collect_data, train
from src.feature_map import IdentityFeatureMap

In [7]:
DIM_X = 1
DIM_S = 1
DIM_THETA = DIM_S + DIM_X
T = 100
NUM_DECISIONS = 500
NUM_ITERATIONS = 200
BATCH_SIZE = 128
LEARNING_RATE = 0.01
FAIRNESS_RATE = 0.5
COST_FACTOR = 0.6
FRACTION_PROTECTED = 0.3

In [8]:
def benefit_function(x_s, s, sample_theta, policy, gradient):
    ips_weight, phi, log_gradient_denominator = policy.calculate_ips_weights_and_log_gradient(x_s, s, sample_theta)
    decision = policy(x_s, s).reshape(-1, 1)

    if gradient:
        grad_benefit = ((ips_weight/log_gradient_denominator) * decision * phi).sum(axis=0) / x_s.shape[0]
        return grad_benefit
    else:
        benefit = (ips_weight * decision).sum(axis=0) / x_s.shape[0]
        return benefit

def fairness_function(**fairness_kwargs):
    x = fairness_kwargs['x']
    s = fairness_kwargs['s']
    sample_theta = fairness_kwargs['sample_theta']
    policy = fairness_kwargs['policy']
    gradient = fairness_kwargs['gradient']

    pos_decision_idx = np.arange(s.shape[0]).reshape(-1, 1)

    s_0_idx = pos_decision_idx[s == 0]
    s_1_idx = pos_decision_idx[s == 1]

    return benefit_function(x[s_0_idx], s[s_0_idx], sample_theta, policy, gradient) - benefit_function(x[s_1_idx], s[s_1_idx], sample_theta, policy, gradient)


In [9]:
train(DIM_S, DIM_X, COST_FACTOR, NUM_DECISIONS, FRACTION_PROTECTED, LEARNING_RATE, FAIRNESS_RATE, BATCH_SIZE, NUM_ITERATIONS, T, fairness_function, IdentityFeatureMap(DIM_THETA))

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24


KeyboardInterrupt: 