In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,roc_auc_score,accuracy_score
import statsmodels.api as sm
import xgboost as xgb
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier

# wrapper class for statsmodels linear regression (more stable than SKLearn)
class SM_LinearRegression():
    def __init__(self):
        pass
        
    def fit(self, X, y):
        N = X.shape[0]
        self.LRFit = sm.OLS(y, np.hstack([X,np.ones(N).reshape(-1,1)]),hasconst=True).fit()
        
    def predict(self,X):
        N = X.shape[0]
        return self.LRFit.predict(np.hstack([X,np.ones(N).reshape(-1,1)]))
    
def to_binary(y):
    y = [e/max(y) for e in y]
    y = [1 if e>.5 else 0 for e in y]
    return np.array(y)    

In [2]:
classifiers = [SM_LinearRegression(),
               RandomForestClassifier(random_state=42),
               LGBMClassifier(seed = 42),
               xgb.XGBClassifier(objective = 'binary:logistic', seed = 42)]
classifier = classifiers[0]

### compas model

In [3]:
df = pd.read_csv('compas-scores-two-years.csv')
print(df.shape)
df = df[ df['race'].isin(['African-American', 'Caucasian']) ]
feats = list(df.columns)
print(df.shape)
for e in feats:
    if e.startswith('juv_'):
        big_cat = dict(df[e].value_counts())
        bigs = [c for c in big_cat if big_cat[c]>=10]
        df = df[ df[e].isin(bigs) ]
        print(df.shape)
df = df[ ["race","sex","age_cat","c_charge_degree","two_year_recid"] ]
df.to_csv('compas-scores-two-years_short.csv',index=None)

(7214, 53)
(6150, 53)
(6139, 53)
(6118, 53)
(6113, 53)


In [4]:
df = pd.read_csv('compas-scores-two-years.csv')
df = df[ df['race'].isin(['African-American', 'Caucasian']) ]
feats = list(df.columns)
print(df.shape)
for e in feats:
    if e.startswith('juv_'):
        big_cat = dict(df[e].value_counts())
        bigs = [c for c in big_cat if big_cat[c]>=10]
        df = df[ df[e].isin(bigs) ]
        print(df.shape)

(6150, 53)
(6139, 53)
(6118, 53)
(6113, 53)


In [5]:
# score for Black defendants
threshold  = 6
df_black = df[df['race']=="African-American"].copy()
df_black['is_med_or_high_risk'] = (df_black['decile_score']>=threshold).astype(int)
[[tn , fp],[fn , tp]]  = confusion_matrix(df_black['two_year_recid'], df_black['is_med_or_high_risk'])
print("False positive rate (Black)      : ", fp/(fp+tn))
print("False negative rate (Black)      : ", fn/(fn+tp))
print("Accuracy (Black)      :  ", accuracy_score(df_black['two_year_recid'], df_black['is_med_or_high_risk']) )
print("AUC (Black)      :  ", roc_auc_score(df_black['two_year_recid'], df_black['is_med_or_high_risk']) )

fpr_black = fp/(fp+tn)
fnr_black =  fn/(fn+tp)

# score for White defendants
threshold  = 6
df_white = df[df['race']=="Caucasian"].copy()
df_white['is_med_or_high_risk'] = (df_white['decile_score']>=threshold).astype(int)
[[tn , fp],[fn , tp]]  = confusion_matrix(df_white['two_year_recid'], df_white['is_med_or_high_risk'])
print("\nFalse positive rate (White)      : ", fp/(fp+tn))
print("False negative rate (White)      : ", fn/(fn+tp))
print("Accuracy (White)      :  ", accuracy_score(df_white['two_year_recid'], df_white['is_med_or_high_risk']) )
print("AUC (White)      :  ", roc_auc_score(df_white['two_year_recid'], df_white['is_med_or_high_risk']) )

fpr_white = fp/(fp+tn)
fnr_white =  fn/(fn+tp)

print('\nFalse positive rate Gap:', abs(fpr_black-fpr_white))
print('False negative rate Gap:', abs(fnr_black-fnr_white))

False positive rate (Black)      :  0.3417085427135678
False negative rate (Black)      :  0.3757995735607676
Accuracy (Black)      :   0.6408508317425688
AUC (Black)      :   0.6412459418628323

False positive rate (White)      :  0.14555256064690028
False negative rate (White)      :  0.5945945945945946
Accuracy (White)      :   0.6778413736713
AUC (White)      :   0.6299264223792527

False positive rate Gap: 0.19615598206666754
False negative rate Gap: 0.21879502103382703


### Model with all features including 'race' (or unfair model)

In [6]:
df = pd.read_csv('compas-scores-two-years.csv')
print(df.shape)
feats = ['race', 'sex', 'age_cat',  'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count', 'c_charge_degree', 'two_year_recid']
df = df[ feats ]
df = df[ df['race'].isin(['African-American', 'Caucasian']) ]
print(df.shape)
for e in feats:
    if e.startswith('juv_'):
        big_cat = dict(df[e].value_counts())
        bigs = [c for c in big_cat if big_cat[c]>=10]
        df = df[ df[e].isin(bigs) ]
        print(df.shape)
        
data_model  = pd.concat([
                df[ ['priors_count','two_year_recid'] ], 
                pd.get_dummies(df['race'], drop_first = True, prefix = 'race'),
                pd.get_dummies(df['sex'], drop_first = True, prefix = 'sex'),
                pd.get_dummies(df['age_cat'], drop_first = True, prefix = 'age_cat'),
                pd.get_dummies(df['juv_fel_count'], drop_first = True, prefix = 'juv_fel_count'),
                pd.get_dummies(df['juv_misd_count'], drop_first = True, prefix = 'juv_misd_count'),
                pd.get_dummies(df['juv_other_count'], drop_first = True, prefix = 'juv_other_count'),
                pd.get_dummies(df['c_charge_degree'], drop_first = True, prefix = 'c_charge_degree')
                ], axis = 1)
print(data_model.shape)

## Train/Test Split
target_col = 'two_year_recid'
X_train, X_test, y_train, y_test = train_test_split(data_model.drop([target_col], axis=1), 
                                                    data_model[target_col], 
                                                    stratify = data_model[target_col],
                                                    random_state=42, test_size=0.2)
model_w_race = classifier
model_w_race.fit(X_train.values, y_train)

# score for Black defendants
X_test_ = X_test[ X_test['race_Caucasian']==0 ]
y_test_ = y_test[ X_test['race_Caucasian']==0 ]
y_pred_ = model_w_race.predict(X_test_.values)
y_pred_= to_binary(y_pred_)
[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("False positive rate (Black)      : ", fp/(fp+tn))
print("False negative rate (Black)      : ", fn/(fn+tp))
print("Accuracy (Black)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (Black)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_black = fp/(fp+tn)
fnr_black =  fn/(fn+tp)

# score for White defendants
X_test_ = X_test[ X_test['race_Caucasian']==1 ]
y_test_ = y_test[ X_test['race_Caucasian']==1 ]
y_pred_ = model_w_race.predict(X_test_.values)
y_pred_= to_binary(y_pred_)
[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("\nFalse positive rate (White)      : ", fp/(fp+tn))
print("False negative rate (White)      : ", fn/(fn+tp))
print("Accuracy (White)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (White)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_white = fp/(fp+tn)
fnr_white =  fn/(fn+tp)

print('\nFalse positive rate Gap:', abs(fpr_black-fpr_white))
print('False negative rate Gap:', abs(fnr_black-fnr_white))


(7214, 53)
(6150, 9)
(6139, 9)
(6118, 9)
(6113, 9)
(6113, 18)
False positive rate (Black)      :  0.11428571428571428
False negative rate (Black)      :  0.6406685236768802
Accuracy (Black)      :   0.6191819464033851
AUC (Black)      :   0.6225228810187027

False positive rate (White)      :  0.19672131147540983
False negative rate (White)      :  0.631578947368421
Accuracy (White)      :   0.6264591439688716
AUC (White)      :   0.5858498705780846

False positive rate Gap: 0.08243559718969555
False negative rate Gap: 0.00908957630845919


### Model without 'race' (or unaware model)

In [7]:
## Train/Test Split
target_col = 'two_year_recid'
X_train, X_test, y_train, y_test = train_test_split(data_model.drop([target_col], axis=1), 
                                                    data_model[target_col], 
                                                    stratify = data_model[target_col],
                                                    random_state=42, test_size=0.2)

cols = list(X_train.columns)
cols.remove('race_Caucasian')
X_train_wo_race = X_train[ cols ]
X_test_wo_race = X_test[ cols ]

model_wo_race = classifier
model_wo_race.fit(X_train_wo_race.values, y_train)

# score for Black defendants
X_test_ = X_test_wo_race[ X_test['race_Caucasian']==0 ]
y_test_ = y_test[ X_test['race_Caucasian']==0 ]
y_pred_ = model_wo_race.predict(X_test_.values)
y_pred_= to_binary(y_pred_)
[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("False positive rate (Black)      : ", fp/(fp+tn))
print("False negative rate (Black)      : ", fn/(fn+tp))
print("Accuracy (Black)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (Black)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_black = fp/(fp+tn)
fnr_black =  fn/(fn+tp)

# score for White defendants
X_test_ = X_test_wo_race[ X_test['race_Caucasian']==1 ]
y_test_ = y_test[ X_test['race_Caucasian']==1 ]
y_pred_ = model_wo_race.predict(X_test_.values)
y_pred_= to_binary(y_pred_)

[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("\nFalse positive rate (White)      : ", fp/(fp+tn))
print("False negative rate (White)      : ", fn/(fn+tp))
print("Accuracy (White)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (White)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_white = fp/(fp+tn)
fnr_white =  fn/(fn+tp)

print('\nFalse positive rate Gap:', abs(fpr_black-fpr_white))
print('False negative rate Gap:', abs(fnr_black-fnr_white))

False positive rate (Black)      :  0.10571428571428572
False negative rate (Black)      :  0.6462395543175488
Accuracy (Black)      :   0.6205923836389281
AUC (Black)      :   0.6240230799840827

False positive rate (White)      :  0.20327868852459016
False negative rate (White)      :  0.6267942583732058
Accuracy (White)      :   0.6245136186770428
AUC (White)      :   0.5849635265511021

False positive rate Gap: 0.09756440281030444
False negative rate Gap: 0.019445295944342966


### Counterfactual Fairness

In [8]:
# reference: https://github.com/fiorenza2/CFFair_Emulate
## Train/Test Split
target_col = 'two_year_recid'
X_train, X_test, y_train, y_test = train_test_split(data_model.drop([target_col], axis=1), 
                                                    data_model[target_col], 
                                                    stratify = data_model[target_col],
                                                    random_state=42, test_size=0.2)


non_race_feats = list(X_train.columns)
non_race_feats.remove('race_Caucasian')

eps_train = []
eps_test = []
for feat in non_race_feats:
    model = classifier
    model.fit(np.vstack((X_train['race_Caucasian'].values.reshape(-1,1),X_test['race_Caucasian'].values.reshape(-1,1))),
               list(X_train[feat])  + list(X_test[feat]) 
                           )
    v_train = list(X_train[feat]) - model.predict(X_train['race_Caucasian'].values.reshape(-1,1))
    v_test = list(X_test[feat]) - model.predict(X_test['race_Caucasian'].values.reshape(-1,1))
    eps_train += [ v_train.reshape(-1,1) ]
    eps_test += [ v_test.reshape(-1,1) ]
    
# predict on target using abducted latents
model = classifier
model.fit(np.hstack( eps_train ),y_train)

# predict on test epsilons
preds = model.predict(np.hstack( eps_test ))
preds= to_binary(preds)


# score for Black defendants
y_test_ = y_test[ X_test['race_Caucasian']==0 ]
y_pred_ =  preds[ X_test['race_Caucasian']==0 ]
[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("False positive rate (Black)      : ", fp/(fp+tn))
print("False negative rate (Black)      : ", fn/(fn+tp))
print("Accuracy (Black)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (Black)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_black = fp/(fp+tn)
fnr_black =  fn/(fn+tp)

# score for White defendants
y_test_ = y_test[ X_test['race_Caucasian']==1 ]
y_pred_ =  preds[ X_test['race_Caucasian']==1 ]
[[tn , fp],[fn , tp]]  = confusion_matrix(y_test_, y_pred_)
print("\nFalse positive rate (White)      : ", fp/(fp+tn))
print("False negative rate (White)      : ", fn/(fn+tp))
print("Accuracy (White)      :  ", accuracy_score(y_test_, y_pred_) )
print("AUC (White)      :  ", roc_auc_score(y_test_, y_pred_) )

fpr_white = fp/(fp+tn)
fnr_white =  fn/(fn+tp)

print('\nFalse positive rate Gap:', abs(fpr_black-fpr_white))
print('False negative rate Gap:', abs(fnr_black-fnr_white))

False positive rate (Black)      :  0.1
False negative rate (Black)      :  0.6740947075208914
Accuracy (Black)      :   0.609308885754584
AUC (Black)      :   0.6129526462395544

False positive rate (White)      :  0.16393442622950818
False negative rate (White)      :  0.6507177033492823
Accuracy (White)      :   0.6381322957198443
AUC (White)      :   0.5926739352106047

False positive rate Gap: 0.06393442622950818
False negative rate Gap: 0.02337700417160915
