In [1]:
import numpy as np
from classes import Group, Environment, PredictiveModel, DecisionModel
import tqdm
import fico
import distribution_to_loans_outcomes as dlo
import pandas as pd
import time
import loan_lending_experiment
import loan_lending_fairness_metric
import pickle
import matplotlib.pyplot as plt

import os
import sys
sys.path.insert(0, os.getcwd() + '/../')
import vanilla_monte_carlo

In [2]:
DATA_DIR = './data/'
all_cdfs, performance, totals = fico.get_FICO_data(data_dir=DATA_DIR, do_convert_percentiles=False)
cdfs = all_cdfs[["White","Black"]]

cdf_B = cdfs['White'].values
cdf_A = cdfs['Black'].values

repay_B = performance['White']
repay_A = performance['Black']

scores = cdfs.index
scores_list = scores.tolist()
scores_repay = cdfs.index

# to populate group distributions
def get_pmf(cdf):
    pis = np.zeros(cdf.size)
    pis[0] = cdf[0]
    for score in range(cdf.size-1):
        pis[score+1] = cdf[score+1] - cdf[score]
    return pis

# to get loan repay probabilities for a given score
# loan_repaid_probs = [lambda i: repay_A[scores[scores.get_loc(i,method='nearest')]], 
#                      lambda i: repay_B[scores[scores.get_loc(i,method='nearest')]]]
def get_repay_A(i):
    return repay_A[scores[scores.get_loc(i, method='nearest')]]
def get_repay_B(i):
    return repay_B[scores[scores.get_loc(i, method='nearest')]]
loan_repaid_probs = [loan_lending_experiment.get_repay_A, loan_lending_experiment.get_repay_B]

# basic parameters
N_scores = cdf_B.size
N_groups = 2

# get probability mass functions of each group
pi_A = get_pmf(cdf_A)
pi_B = get_pmf(cdf_B)
pis = np.vstack([pi_A, pi_B])

# demographic statistics 
group_ratio = np.array((totals["Black"], totals["White"]))
group_size_ratio = group_ratio/group_ratio.sum()

print(totals)

utility_repaid = 1
utility_default = -5
util_repay = [utility_default,utility_repaid]
score_change_repay = 13.5
score_change_default = -27
score_change = [score_change_repay, score_change_default]
# init groups people 
people_black = (pi_A * totals["Black"]).astype(int)
people_white = (pi_B * totals["White"]).astype(int)
people_distribution = [people_black, people_white]

# rename the repay prob
repay_black = np.array(repay_A)
repay_white = np.array(repay_B)
repay_probs = [repay_black, repay_white]
######################################################################
sample_rate = 0.1
n_time_steps = 20

{'Asian': 7906, 'White': 133165, 'Hispanic': 14702, 'Black': 18274}


In [3]:
init_params = {'repay_repaid_probs_func':loan_repaid_probs, 
               'repay_probs':repay_probs,
               'people_dist':people_distribution, 
               'scores_list':scores_list}
potential_fairness_requirements = [0.3]
potential_sample_rate_mean = [0.05]
potential_sample_rate_std = [0.01]
potential_score_change_repay = [8, 12, 16, 20]
potential_score_change_default = [-16, -24, -32, -40]
potential_score_update_mode = ['equal', 'small_var', 'large_var']
potential_agent = ['eqopp', 'maxprof']
potential_utility_default = list(range(-10, -2))

# get all possible combinations of parameters
all_params = [potential_fairness_requirements, 
                potential_sample_rate_mean, 
                potential_sample_rate_std, 
                potential_score_change_repay, 
                potential_score_change_default,
                potential_utility_default,
                potential_score_update_mode,
                potential_agent]
all_params = np.array(np.meshgrid(*all_params)).T.reshape(-1,len(all_params))

all_configs = []
n_time_steps = 20
for id, params in enumerate(all_params):
    fairness_requirement, sample_rate_mean, sample_rate_std, \
        score_change_repay, score_change_default, utility_default, score_update_mode, agent = params
    score_change = [score_change_repay.astype(int), score_change_default.astype(int)]
    utility_repaid = 1
    util_repay = [utility_default.astype(int),utility_repaid]
    demo_parity = loan_lending_fairness_metric.DemographicParity(fairness_requirement.astype(float))
    bank_utility = loan_lending_fairness_metric.BankAgentUtility(utility_default.astype(int))
    configuration = loan_lending_experiment.LoanLendingConfiguration(score_change, 
                                                                        score_update_mode,
                                                                        util_repay,
                                                                        sample_rate_mean.astype(float),
                                                                        sample_rate_std.astype(float),
                                                                        demo_parity,
                                                                        bank_utility,
                                                                        agent,
                                                                        n_time_steps)
    all_configs.append([id, configuration])

In [19]:
len(all_configs)

768

In [29]:
for i in range(252, 262):
    print(all_configs[i][1])

score_update_params: [8, -40]
score_update_mode: small_var
util_repay_params: [-3, 1]
sample_ratio_mean: 0.05
sample_ratio_std: 0.01
fairness_requirement: Demographic Parity: 0.3
utility_metric: Bank Agent Real Utility:-3
agent: eqopp
n_time_steps: 20

score_update_params: [12, -40]
score_update_mode: small_var
util_repay_params: [-3, 1]
sample_ratio_mean: 0.05
sample_ratio_std: 0.01
fairness_requirement: Demographic Parity: 0.3
utility_metric: Bank Agent Real Utility:-3
agent: eqopp
n_time_steps: 20

score_update_params: [16, -40]
score_update_mode: small_var
util_repay_params: [-3, 1]
sample_ratio_mean: 0.05
sample_ratio_std: 0.01
fairness_requirement: Demographic Parity: 0.3
utility_metric: Bank Agent Real Utility:-3
agent: eqopp
n_time_steps: 20

score_update_params: [20, -40]
score_update_mode: small_var
util_repay_params: [-3, 1]
sample_ratio_mean: 0.05
sample_ratio_std: 0.01
fairness_requirement: Demographic Parity: 0.3
utility_metric: Bank Agent Real Utility:-3
agent: eqopp
n_t

In [20]:
# create a vanilla testing object
if __name__ == '__main__':
    stop_criteria = (loan_lending_experiment.MaxUnfairnessStopCriteria, 0.02)
    vanilla_mc_test = vanilla_monte_carlo.VanillaMonteCarloTesting(all_configs[:252], init_params, loan_lending_experiment.LoanLendingMonteCarloSimulation, stop_criteria)
    results0 = vanilla_mc_test.run_testing(n_threads=12, t_limit=60)
    vanilla_mc_test = vanilla_monte_carlo.VanillaMonteCarloTesting(all_configs[252:504], init_params, loan_lending_experiment.LoanLendingMonteCarloSimulation, stop_criteria)
    results1 = vanilla_mc_test.run_testing(n_threads=12, t_limit=240)
    vanilla_mc_test = vanilla_monte_carlo.VanillaMonteCarloTesting(all_configs[504:], init_params, loan_lending_experiment.LoanLendingMonteCarloSimulation, stop_criteria)
    results2 = vanilla_mc_test.run_testing(n_threads=12, t_limit=240)



Total time used: 1548.7129244804382 seconds
Total time used: 2283.3472759723663 seconds
Total time used: 2356.602772474289 seconds


In [22]:
results = results0 + results1 + results2
len(results)

768

In [23]:
results = results0 + results1 + results2
import pickle
with open('simulation_results/results768_w_utility.pkl', 'wb') as f:
    pickle.dump(results, f)