In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import pickle
import os 
from glob import glob
import matplotlib as mpl 
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from scipy.stats import permutation_test
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from methods import * 

%load_ext autoreload
%autoreload 2

## Load data, train model

In [None]:
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
dataset = (
    pd.read_excel(io=data_url, header=1)
    .drop(columns=["ID"])
    .rename(
        columns={"PAY_0": "PAY_1", "default payment next month": "default"}
    )
)

np.random.seed(42)
train_inds = np.random.choice(dataset.shape[0], 10000, replace=False)
df_train = dataset.iloc[train_inds]

X_train = df_train.drop(columns='default')
y_train = df_train['default']

In [None]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
df_test = dataset[~dataset.index.isin(train_inds)]
df1 = df_test[(df_test.EDUCATION <= 1) & (df_test.default == 0)]
df2 = df_test[(df_test.EDUCATION >= 3) & (df_test.default == 0)]
X1 = df1.drop(columns='default')
X2 = df2.drop(columns='default')
X1.shape, X2.shape

In [None]:
# Check that equality of opportunity is not met 
y1 = clf.predict(X1)
y2 = clf.predict(X2)
np.mean(y1), np.mean(y2)



In [None]:
# Simulate fair model by changing means 
z1 = y1 + np.mean(y2) - np.mean(y1)
z2 = y2
print(np.mean(z1), np.mean(z2))

## Experiments

In [None]:
# Run null and alternative, no Bonferroni correction 
# (Permutation tests may take awhile)


alphas = np.linspace(0.005, 0.1, 20)
iters = 10 


betting_tau, _ = betting_experiment(y1, y2, alphas, iters) # Alternative 
_, betting_fdr = betting_experiment(z1, z2, alphas, iters) # Null 

perm_250_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=250, bonferroni=False)
_, perm_250_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=250, bonferroni=False)

perm_500_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=500, bonferroni=False)
_, perm_500_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=500, bonferroni=False)

perm_1000_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=1000, bonferroni=False)
_, perm_1000_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=1000, bonferroni=False)


save_results('betting_loan_tau', betting_tau)
save_results('betting_loan_fdr', betting_fdr)
save_results('perm_250_loan_tau', perm_250_tau)
save_results('perm_250_loan_fdr', perm_250_fdr)
save_results('perm_500_loan_tau', perm_500_tau)
save_results('perm_500_loan_fdr', perm_500_fdr)
save_results('perm_1000_loan_tau', perm_1000_tau)
save_results('perm_1000_loan_fdr', perm_1000_fdr)



In [None]:
## Run null and alternative, using Bonferroni correction 

alphas = np.linspace(0.005, 0.1, 20)
iters = 10 

betting_tau, _ = betting_experiment(y1, y2, alphas, iters) # Alternative 
_, betting_fdr = betting_experiment(z1, z2, alphas, iters) # Null 

permb_250_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=250, bonferroni=True)
_, permb_250_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=250, bonferroni=True)

permb_500_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=500, bonferroni=True)
_, permb_500_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=500, bonferroni=True)

permb_1000_tau, _ = seq_perm_test_experiment(y1, y2, alphas, iters, k=1000, bonferroni=True)
_, permb_1000_fdr = seq_perm_test_experiment(z1, z2, alphas, iters, k=1000, bonferroni=True)


save_results('betting_loan_tau', betting_tau)
save_results('betting_loan_fdr', betting_fdr)
save_results('permb_250_loan_tau', permb_250_tau)
save_results('permb_250_loan_fdr', permb_250_fdr)
save_results('permb_500_loan_tau', permb_500_tau)
save_results('permb_500_loan_fdr', permb_500_fdr)
save_results('permb_1000_loan_tau', permb_1000_tau)
save_results('permb_1000_loan_fdr', permb_1000_fdr)