# [HSLS] Baseline -- Equalized Odds 


In [2]:
%matplotlib inline
# Load all necessary packages
import sys
cwd = '../../../core'
sys.path.append(cwd)

import numpy as np
import pandas as pd

sys.path.append("../")
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset, StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector


from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.eq_odds_postprocessing import EqOddsPostprocessing
from tqdm import tqdm

from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve
from sklearn.impute import KNNImputer,SimpleImputer

from IPython.display import Markdown, display
import matplotlib.pyplot as plt


from missing_module import * 

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
np.set_printoptions(threshold=np.inf)







<br/>

### Load Data

In [11]:
df_ms = pd.read_pickle('pkl_data/hsls_orig.pkl')

sens_attr = 'racebin'
privileged_groups = [{'racebin': 1}]
unprivileged_groups = [{'racebin': 0}]

df_ms.describe()

Unnamed: 0,X1MTHID,X1MTHUTI,X1MTHEFF,X1PAR2EDU,X1FAMINCOME,X1P1RELATION,X1PAR1EMP,X1SCHOOLBEL,X1STU30OCC2,racebin,gradebin
count,21159.0,18802.0,18759.0,12889.0,16429.0,16429.0,16429.0,20680.0,21018.0,21444.0,21444.0
mean,0.506601,0.724713,0.652447,0.326208,0.303599,0.056644,0.756001,0.744307,0.433003,0.552789,0.5
std,0.287608,0.206873,0.219202,0.248146,0.254136,0.177614,0.313303,0.169257,0.377249,0.497217,0.500012
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.332378,0.582988,0.519824,0.166667,0.083333,0.0,0.333333,0.6633,0.181818,0.0,0.0
50%,0.504298,0.665975,0.665198,0.166667,0.25,0.0,1.0,0.725589,0.204545,1.0,0.5
75%,0.667622,0.914938,0.764317,0.5,0.416667,0.066667,1.0,0.875421,0.988636,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


<br/><br/>

In [12]:
fr_list = []
acc_list = [] 
randseed = 42 
for seed in range (1, 11): 
    dataset_orig_train, dataset_orig_vt = train_test_split(df_ms, test_size=0.3, random_state=seed)
    
    
    ## Change the following two lines to get mean or k-nn results ##
#     imputer = SimpleImputer()
    imputer = KNNImputer()
    
    imputer.fit(dataset_orig_train)
    
    dataset_orig_train = pd.DataFrame(imputer.transform(dataset_orig_train), columns=dataset_orig_train.columns, 
                                      index=dataset_orig_train.index)
    vt_imputer = SimpleImputer()
    vt_imputer.fit(dataset_orig_train)
    dataset_orig_vt = pd.DataFrame(vt_imputer.transform(dataset_orig_vt), columns=dataset_orig_vt.columns, 
                                      index=dataset_orig_vt.index)
    
    dataset_orig_valid, dataset_orig_test = train_test_split(dataset_orig_vt, test_size=0.5, random_state=seed)
    

    ### Converting to AIF360 StandardDataset objects ###
    dataset_orig_train = StandardDataset(dataset_orig_train, label_name='gradebin', favorable_classes=[1],
                                         protected_attribute_names=['racebin'], privileged_classes=[[1]])
    dataset_orig_valid = StandardDataset(dataset_orig_valid, label_name='gradebin', favorable_classes=[1],
                                         protected_attribute_names=['racebin'], privileged_classes=[[1]])
    dataset_orig_test = StandardDataset(dataset_orig_test, label_name='gradebin', favorable_classes=[1],
                                         protected_attribute_names=['racebin'], privileged_classes=[[1]])

    # Placeholder for predicted and transformed datasets
    dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
    dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)

    dataset_new_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_new_test_pred = dataset_orig_test.copy(deepcopy=True)

    idx_wo_protected = list(range(9))
    X_train = dataset_orig_train.features[:,idx_wo_protected]
    y_train = dataset_orig_train.labels.ravel()

    X_train.shape
    lmod = DecisionTreeClassifier(max_depth=3, random_state=42)

    lmod.fit(X_train, y_train)

    
    fav_idx = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]
    y_train_pred_prob = lmod.predict_proba(X_train)[:,fav_idx]

    # Prediction probs for validation and testing data
    X_valid = dataset_orig_valid.features[:,idx_wo_protected]
    y_valid_pred_prob = lmod.predict_proba(X_valid)[:,fav_idx]

    X_test = dataset_orig_test.features[:,idx_wo_protected]
    y_test_pred_prob = lmod.predict_proba(X_test)[:,fav_idx]

    class_thresh = 0.5
    dataset_orig_train_pred.scores = y_train_pred_prob.reshape(-1,1)
    dataset_orig_valid_pred.scores = y_valid_pred_prob.reshape(-1,1)
    dataset_orig_test_pred.scores = y_test_pred_prob.reshape(-1,1)

    y_train_pred = np.zeros_like(dataset_orig_train_pred.labels)
    y_train_pred[y_train_pred_prob >= class_thresh] = dataset_orig_train_pred.favorable_label
    y_train_pred[~(y_train_pred_prob >= class_thresh)] = dataset_orig_train_pred.unfavorable_label
    dataset_orig_train_pred.labels = y_train_pred

    y_valid_pred = np.zeros_like(dataset_orig_valid_pred.labels)
    y_valid_pred[y_valid_pred_prob >= class_thresh] = dataset_orig_valid_pred.favorable_label
    y_valid_pred[~(y_valid_pred_prob >= class_thresh)] = dataset_orig_valid_pred.unfavorable_label
    dataset_orig_valid_pred.labels = y_valid_pred

    y_test_pred = np.zeros_like(dataset_orig_test_pred.labels)
    y_test_pred[y_test_pred_prob >= class_thresh] = dataset_orig_test_pred.favorable_label
    y_test_pred[~(y_test_pred_prob >= class_thresh)] = dataset_orig_test_pred.unfavorable_label
    dataset_orig_test_pred.labels = y_test_pred

    
    cpp = EqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     seed=randseed)
    cpp = cpp.fit(dataset_orig_valid, dataset_orig_valid_pred)
    
    dataset_transf_valid_pred = cpp.predict(dataset_orig_valid_pred)
    dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred)
    
    cm_pred_test = ClassificationMetric(dataset_orig_test, dataset_orig_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    
    display(Markdown("#### Original-Predicted testing dataset"))
    print("Difference in FNR between unprivileged and privileged groups")
    print(cm_pred_test.difference(cm_pred_test.false_negative_rate))
    
    print("Overall Test Accuracy ")
    print(cm_pred_test.accuracy())
    
    cm_transf_test = ClassificationMetric(dataset_orig_test, dataset_transf_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    display(Markdown("#### Original-Transformed testing dataset"))
    print("Difference in FNR between unprivileged and privileged groups")
    fr = np.abs(cm_transf_test.difference(cm_transf_test.false_negative_rate))
    fr_list.append(fr)
    print(fr)

    print("Overall Test Accuracy ")
    acc = cm_transf_test.accuracy()
    acc_list.append(acc)
    print(acc)
    
    text_representation = tree.export_text(lmod)
    print(text_representation)


#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.019972400067591656
Overall Test Accuracy 
0.6496736089524402


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.010636300482529393
Overall Test Accuracy 
0.6300901460988498
|--- feature_3 <= 0.28
|   |--- feature_0 <= 0.58
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.58
|   |   |--- feature_4 <= 0.24
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.24
|   |   |   |--- class: 1.0
|--- feature_3 >  0.28
|   |--- feature_0 <= 0.34
|   |   |--- feature_4 <= 0.38
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.38
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.34
|   |   |--- feature_0 <= 0.83
|   |   |   |--- class: 1.0
|   |   |--- feature_0 >  0.83
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.036296436088205375
Overall Test Accuracy 
0.6521603978862294


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.00541367861461009
Overall Test Accuracy 
0.6331986322660864
|--- feature_3 <= 0.25
|   |--- feature_0 <= 0.50
|   |   |--- feature_4 <= 0.33
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.33
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 1.0
|--- feature_3 >  0.25
|   |--- feature_0 <= 0.48
|   |   |--- feature_4 <= 0.34
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.34
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.48
|   |   |--- feature_4 <= 0.33
|   |   |   |--- class: 1.0
|   |   |--- feature_4 >  0.33
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.01593293698556858
Overall Test Accuracy 
0.6608641591544918


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.03509683772841665
Overall Test Accuracy 
0.6537146409698477
|--- feature_4 <= 0.33
|   |--- feature_5 <= 0.16
|   |   |--- feature_0 <= 0.50
|   |   |   |--- class: 0.0
|   |   |--- feature_0 >  0.50
|   |   |   |--- class: 1.0
|   |--- feature_5 >  0.16
|   |   |--- feature_6 <= 0.97
|   |   |   |--- class: 0.0
|   |   |--- feature_6 >  0.97
|   |   |   |--- class: 0.0
|--- feature_4 >  0.33
|   |--- feature_0 <= 0.50
|   |   |--- feature_2 <= 0.57
|   |   |   |--- class: 0.0
|   |   |--- feature_2 >  0.57
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.22
|   |   |   |--- class: 1.0
|   |   |--- feature_3 >  0.22
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.09326108746495443
Overall Test Accuracy 
0.6474976686353746


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.009485901887011705
Overall Test Accuracy 
0.6179670500466273
|--- feature_3 <= 0.28
|   |--- feature_0 <= 0.50
|   |   |--- feature_4 <= 0.33
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.33
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.50
|   |   |--- feature_4 <= 0.24
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.24
|   |   |   |--- class: 1.0
|--- feature_3 >  0.28
|   |--- feature_0 <= 0.49
|   |   |--- feature_4 <= 0.31
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.31
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.49
|   |   |--- feature_0 <= 0.83
|   |   |   |--- class: 1.0
|   |   |--- feature_0 >  0.83
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.028387837671885296
Overall Test Accuracy 
0.6462542741684799


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.02043871207570086
Overall Test Accuracy 
0.6394156046005596
|--- feature_3 <= 0.28
|   |--- feature_0 <= 0.62
|   |   |--- feature_5 <= 0.16
|   |   |   |--- class: 0.0
|   |   |--- feature_5 >  0.16
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.62
|   |   |--- feature_5 <= 0.07
|   |   |   |--- class: 1.0
|   |   |--- feature_5 >  0.07
|   |   |   |--- class: 0.0
|--- feature_3 >  0.28
|   |--- feature_0 <= 0.48
|   |   |--- feature_4 <= 0.38
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.38
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.48
|   |   |--- feature_5 <= 0.10
|   |   |   |--- class: 1.0
|   |   |--- feature_5 >  0.10
|   |   |   |--- class: 0.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.02972930632706483
Overall Test Accuracy 
0.6468759714019272


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.03182135653626983
Overall Test Accuracy 
0.6471868200186509
|--- feature_3 <= 0.25
|   |--- feature_0 <= 0.65
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.65
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 1.0
|--- feature_3 >  0.25
|   |--- feature_0 <= 0.47
|   |   |--- feature_4 <= 0.34
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.34
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.47
|   |   |--- feature_0 <= 0.69
|   |   |   |--- class: 1.0
|   |   |--- feature_0 >  0.69
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.06219245489338435
Overall Test Accuracy 
0.5878147342244328


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.029353471842536893
Overall Test Accuracy 
0.580354367423065
|--- feature_4 <= 0.24
|   |--- feature_0 <= 0.50
|   |   |--- feature_3 <= 0.28
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.28
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.50
|   |   |--- feature_5 <= 0.16
|   |   |   |--- class: 1.0
|   |   |--- feature_5 >  0.16
|   |   |   |--- class: 0.0
|--- feature_4 >  0.24
|   |--- feature_0 <= 0.50
|   |   |--- feature_3 <= 0.22
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.22
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.22
|   |   |   |--- class: 1.0
|   |   |--- feature_3 >  0.22
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.031186868686868707
Overall Test Accuracy 
0.6611750077712154


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.010227272727272696
Overall Test Accuracy 
0.6437674852346907
|--- feature_4 <= 0.33
|   |--- feature_0 <= 0.50
|   |   |--- feature_3 <= 0.28
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.28
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 1.0
|--- feature_4 >  0.33
|   |--- feature_0 <= 0.50
|   |   |--- feature_3 <= 0.22
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.22
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.18
|   |   |   |--- class: 1.0
|   |   |--- feature_3 >  0.18
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.024484618025969407
Overall Test Accuracy 
0.6537146409698477


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.022586511681143562
Overall Test Accuracy 
0.6450108797015853
|--- feature_3 <= 0.25
|   |--- feature_4 <= 0.24
|   |   |--- feature_5 <= 0.16
|   |   |   |--- class: 0.0
|   |   |--- feature_5 >  0.16
|   |   |   |--- class: 0.0
|   |--- feature_4 >  0.24
|   |   |--- feature_0 <= 0.50
|   |   |   |--- class: 0.0
|   |   |--- feature_0 >  0.50
|   |   |   |--- class: 1.0
|--- feature_3 >  0.25
|   |--- feature_0 <= 0.48
|   |   |--- feature_3 <= 0.48
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.48
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.48
|   |   |--- feature_0 <= 0.83
|   |   |   |--- class: 1.0
|   |   |--- feature_0 >  0.83
|   |   |   |--- class: 1.0



#### Original-Predicted testing dataset

Difference in FNR between unprivileged and privileged groups
0.019035105132640756
Overall Test Accuracy 
0.6596207646875971


#### Original-Transformed testing dataset

Difference in FNR between unprivileged and privileged groups
0.024932175030958403
Overall Test Accuracy 
0.6391047559838359
|--- feature_3 <= 0.28
|   |--- feature_0 <= 0.50
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 0.0
|   |--- feature_0 >  0.50
|   |   |--- feature_3 <= 0.15
|   |   |   |--- class: 0.0
|   |   |--- feature_3 >  0.15
|   |   |   |--- class: 1.0
|--- feature_3 >  0.28
|   |--- feature_0 <= 0.48
|   |   |--- feature_4 <= 0.38
|   |   |   |--- class: 0.0
|   |   |--- feature_4 >  0.38
|   |   |   |--- class: 1.0
|   |--- feature_0 >  0.48
|   |   |--- feature_5 <= 0.09
|   |   |   |--- class: 1.0
|   |   |--- feature_5 >  0.09
|   |   |   |--- class: 1.0



In [13]:
np.array(acc_list).mean(), np.array(acc_list).std()

(0.6329810382343799, 0.019937516226935802)

In [14]:
np.array(fr_list).mean(), np.array(fr_list).std()

(0.019999221860645008, 0.00996417266207264)

In [7]:
acc_mean = [np.array(acc_list).mean()]
acc_std = [np.array(acc_list).std()]
fr_mean = [np.array(fr_list).mean()]
fr_std = [np.array(fr_list).std()]

In [8]:
with open('knn_eqodds_result.pkl', 'wb+') as f: 
    pickle.dump({'fr_mean': fr_mean, 'fr_std': fr_std, 'acc_mean': acc_mean, 'acc_std': acc_std}, f)