# [COMPAS] Baseline -- Equalized Odds


In [1]:
%matplotlib inline
# Load all necessary packages
import sys
cwd = '../../../core'
sys.path.append(cwd)

import numpy as np
import pandas as pd

sys.path.append("../")
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset, StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
                import load_preproc_data_adult, load_preproc_data_compas


from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.eq_odds_postprocessing import EqOddsPostprocessing
from tqdm import tqdm


from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve
from sklearn.impute import KNNImputer, SimpleImputer
from load_compas import * 


from IPython.display import Markdown, display
import matplotlib.pyplot as plt


from load_compas import * 
from missing_module import *

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
np.set_printoptions(threshold=np.inf)







### Generate Missing Data

In [2]:
X, y, x_control = load_compas_data()

df = pd.DataFrame(X, columns= ['age_cat_25 - 45', 'age_cat_Greater than 45', 'age_cat_Less than 25', 'race', 'sex', 
                               'priors_count', 'c_charge_degree'])

idxx = df[df['race']==0].index
print(idxx[:10])

y = pd.Series(y, name="two_year_recid")
y[y==-1] = 0

df = pd.concat([df, y], axis=1)
df_bal = balance_data(df, 'race', 0)

s = 777
df_ms = generate_missing(df_bal, c_label='race', ms_label='sex', p_ms0=0.4, p_ms1=0.1, seed=s)
df_ms = generate_missing(df_ms, c_label='race', ms_label='priors_count', p_ms0=0.6, p_ms1=0.2, seed=s)


privileged_groups = [{'race': 1}]
unprivileged_groups = [{'race': 0}]

df_ms.groupby(df_ms['race']).mean()


Number of people recidivating within two years
-1    2795
 1    2483
dtype: int64


('Features we will be using for classification are:', ['age_cat_25 - 45', 'age_cat_Greater than 45', 'age_cat_Less than 25', 'race', 'sex', 'priors_count', 'c_charge_degree'], '\n')
Int64Index([0, 1, 2, 8, 10, 13, 14, 15, 21, 22], dtype='int64')


Unnamed: 0_level_0,age_cat_25 - 45,age_cat_Greater than 45,age_cat_Less than 25,sex,priors_count,c_charge_degree,two_year_recid
race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.592011,0.147408,0.26058,0.820127,0.13456,0.308131,0.533999
1.0,0.536377,0.298621,0.165002,0.771791,-0.245055,0.408464,0.39087


In [3]:
cost_constraint = "fnr"

privileged_groups = [{'race': 1}]
unprivileged_groups = [{'race': 0}]

favorable_label = 1
randseed = 42 

In [4]:
fr_list = []
acc_list = [] 
for seed in range (1, 11): 
    dataset_orig_train, dataset_orig_test = train_test_split(df_ms, test_size=0.3, random_state=seed)

    dataset_orig_train_no_sens = dataset_orig_train.drop(columns=['race','two_year_recid'])
    dataset_orig_test_no_sens = dataset_orig_test.drop(columns=['race','two_year_recid'])

    
    ## Change the following two lines to get mean or k-nn results ##
    imputer = SimpleImputer()
#     imputer = KNNImputer()

    dataset_orig_train_no_sens = pd.DataFrame(imputer.fit_transform(dataset_orig_train_no_sens), 
                                              columns=dataset_orig_train_no_sens.columns, 
                                              index=dataset_orig_train_no_sens.index)
    dataset_orig_test_no_sens = pd.DataFrame(imputer.transform(dataset_orig_test_no_sens), 
                                             columns=dataset_orig_test_no_sens.columns, 
                                             index=dataset_orig_test_no_sens.index)
    dataset_orig_train = pd.concat([dataset_orig_train_no_sens, dataset_orig_train[['race','two_year_recid']]], axis=1)
    dataset_orig_test = pd.concat([dataset_orig_test_no_sens, dataset_orig_test[['race','two_year_recid']]], axis=1)


    dataset_orig_valid, dataset_orig_test = train_test_split(dataset_orig_test, test_size=0.5, random_state=seed)
    

    ### Converting to AIF360 StandardDataset objects ###
    dataset_orig_train = StandardDataset(dataset_orig_train, label_name='two_year_recid', favorable_classes=[1],
                                         protected_attribute_names=['race'], privileged_classes=[[1]])
    dataset_orig_valid = StandardDataset(dataset_orig_valid, label_name='two_year_recid', favorable_classes=[1],
                                         protected_attribute_names=['race'], privileged_classes=[[1]])
    dataset_orig_test = StandardDataset(dataset_orig_test, label_name='two_year_recid', favorable_classes=[1],
                                         protected_attribute_names=['race'], privileged_classes=[[1]])

    # Placeholder for predicted and transformed datasets
    dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
    dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)

    dataset_new_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_new_test_pred = dataset_orig_test.copy(deepcopy=True)

    X_train = dataset_orig_train.features[:,:-1]
    y_train = dataset_orig_train.labels.ravel()

    X_train.shape
    # lmod = LogisticRegression()
    lmod = DecisionTreeClassifier(max_depth=3)

    lmod.fit(X_train, y_train)

    fav_idx = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]
    y_train_pred_prob = lmod.predict_proba(X_train)[:,fav_idx]

    # Prediction probs for validation and testing data
    X_valid = dataset_orig_valid.features[:,:-1]
    y_valid_pred_prob = lmod.predict_proba(X_valid)[:,fav_idx]

    X_test = dataset_orig_test.features[:,:-1]
    y_test_pred_prob = lmod.predict_proba(X_test)[:,fav_idx]

    class_thresh = 0.5
    dataset_orig_train_pred.scores = y_train_pred_prob.reshape(-1,1)
    dataset_orig_valid_pred.scores = y_valid_pred_prob.reshape(-1,1)
    dataset_orig_test_pred.scores = y_test_pred_prob.reshape(-1,1)

    y_train_pred = np.zeros_like(dataset_orig_train_pred.labels)
    y_train_pred[y_train_pred_prob >= class_thresh] = dataset_orig_train_pred.favorable_label
    y_train_pred[~(y_train_pred_prob >= class_thresh)] = dataset_orig_train_pred.unfavorable_label
    dataset_orig_train_pred.labels = y_train_pred

    y_valid_pred = np.zeros_like(dataset_orig_valid_pred.labels)
    y_valid_pred[y_valid_pred_prob >= class_thresh] = dataset_orig_valid_pred.favorable_label
    y_valid_pred[~(y_valid_pred_prob >= class_thresh)] = dataset_orig_valid_pred.unfavorable_label
    dataset_orig_valid_pred.labels = y_valid_pred

    y_test_pred = np.zeros_like(dataset_orig_test_pred.labels)
    y_test_pred[y_test_pred_prob >= class_thresh] = dataset_orig_test_pred.favorable_label
    y_test_pred[~(y_test_pred_prob >= class_thresh)] = dataset_orig_test_pred.unfavorable_label
    dataset_orig_test_pred.labels = y_test_pred

    
    cpp = EqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     seed=randseed)
    cpp = cpp.fit(dataset_orig_valid, dataset_orig_valid_pred)
    
    dataset_transf_valid_pred = cpp.predict(dataset_orig_valid_pred)
    dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred)
    
    cm_pred_test = ClassificationMetric(dataset_orig_test, dataset_orig_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    
    display(Markdown("#### Original-Predicted testing dataset"))
    print("Difference in FPR between unprivileged and privileged groups")
    print(cm_pred_test.difference(cm_pred_test.false_negative_rate))
    
    print("Overall Test Accuracy ")
    print(cm_pred_test.accuracy())
    
    cm_transf_test = ClassificationMetric(dataset_orig_test, dataset_transf_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    display(Markdown("#### Original-Transformed testing dataset"))
    print("Difference in FPR between unprivileged and privileged groups")
    fr = np.abs(cm_transf_test.difference(cm_transf_test.false_negative_rate))
    fr_list.append(fr)
    print(fr)

    print("Overall Test Accuracy ")
    acc = cm_transf_test.accuracy()
    acc_list.append(acc)
    print(acc)
    


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.19815097216927183
Overall Test Accuracy 
0.5990491283676703


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.1219977125428898
Overall Test Accuracy 
0.5689381933438986


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.09499999999999997
Overall Test Accuracy 
0.6085578446909667


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.031000000000000028
Overall Test Accuracy 
0.5752773375594294


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.2241531938685179
Overall Test Accuracy 
0.6275752773375595


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.03513022410660205
Overall Test Accuracy 
0.5832012678288431


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.23200591333271736
Overall Test Accuracy 
0.6101426307448494


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.08491176198835815
Overall Test Accuracy 
0.595879556259905


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.20166119374154912
Overall Test Accuracy 
0.606973058637084


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.034624299787521684
Overall Test Accuracy 
0.606973058637084


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.2728116300649551
Overall Test Accuracy 
0.6228209191759112


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.04892256933704503
Overall Test Accuracy 
0.5768621236133122


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.16527965037891112
Overall Test Accuracy 
0.5927099841521395


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.009484401878283477
Overall Test Accuracy 
0.572107765451664


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.26348258706467664
Overall Test Accuracy 
0.5657686212361331


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.016417910447761197
Overall Test Accuracy 
0.5404120443740095


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.29367746560917685
Overall Test Accuracy 
0.6307448494453248


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.09987901599677376
Overall Test Accuracy 
0.5625990491283677


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.32853167457849075
Overall Test Accuracy 
0.5832012678288431


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.017515141594368955
Overall Test Accuracy 
0.5372424722662441


In [12]:
acc_mean = [np.array(acc_list).mean()]
acc_std = [np.array(acc_list).std()]
fr_mean = [np.array(fr_list).mean()]
fr_std = [np.array(fr_list).std()]

In [13]:
with open('mean_eqodds_result.pkl', 'wb+') as f: 
    pickle.dump({'fr_mean': fr_mean, 'fr_std': fr_std, 'acc_mean': acc_mean, 'acc_std': acc_std}, f)