# [Adult] Baseline -- Equalized Odds

In [1]:
%matplotlib inline
# Load all necessary packages
import sys
cwd = '../../../core'
sys.path.append(cwd)

import numpy as np
import pandas as pd

sys.path.append("../")
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset, StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
                import load_preproc_data_adult, load_preproc_data_compas


from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.eq_odds_postprocessing import EqOddsPostprocessing
from tqdm import tqdm


from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve
from sklearn.impute import KNNImputer, SimpleImputer

from IPython.display import Markdown, display
import matplotlib.pyplot as plt


from load_adult import * 
from missing_module import * 

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
np.set_printoptions(threshold=np.inf)







<br/>

### Generate Missing Data

In [2]:
## Loading Data ## 
df_train, df_test = load_adult()

## Balancing the Data ##
df = balance_data(df_train, 'income', 0)
df = balance_data(df, 'gender', 1)

sens_attr = 'gender'
s = 42   # random seed

## Generate Missing Data in Training Set ##
df_ms = generate_missing(df, sens_attr, ms_label='marital-status', p_ms0=0, p_ms1=0.4, seed=s)
df_ms = generate_missing(df_ms, sens_attr, ms_label='hours-per-week', p_ms0=0, p_ms1=0.3, seed=s)
df_ms = generate_missing(df_ms, sens_attr, ms_label='race', p_ms0=0.2, p_ms1=0.2, seed=s)

df_ms.describe()

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
count,7834.0,7834.0,7834.0,7834.0,6214.0,7834.0,7834.0,6207.0,7834.0,7834.0,7834.0,6619.0,7834.0,7834.0
mean,0.306785,0.36199,0.702664,0.637265,0.429943,0.453359,0.348634,0.920533,0.498724,0.017038,0.025597,0.401033,0.910474,0.424304
std,0.175442,0.163557,0.237433,0.169363,0.271442,0.311633,0.360753,0.207756,0.50003,0.093686,0.104585,0.120818,0.152926,0.494268
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.164384,0.333333,0.6,0.533333,0.333333,0.230769,0.0,1.0,0.0,0.0,0.0,0.377551,0.95,0.0
50%,0.30137,0.333333,0.733333,0.6,0.333333,0.461538,0.2,1.0,0.0,0.0,0.0,0.397959,0.95,0.0
75%,0.424658,0.333333,0.8,0.8,0.666667,0.692308,0.6,1.0,1.0,0.0,0.0,0.44898,0.95,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


<br/><br/>

## Define Parameters

In [3]:
cost_constraint = "fpr"

privileged_groups = [{'gender': 1}]
unprivileged_groups = [{'gender': 0}]

favorable_label = 1
randseed = 42 

In [4]:
fr_list = []
acc_list = [] 
for seed in range (1, 11): 
    dataset_orig_train, dataset_orig_test = train_test_split(df_ms, test_size=0.3, random_state=seed)


    dataset_orig_train_no_sens = dataset_orig_train.drop(columns=['gender','income'])
    dataset_orig_test_no_sens = dataset_orig_test.drop(columns=['gender','income'])

    ## Change the following two lines to get mean or k-nn results ##
#     imputer = SimpleImputer()
    imputer = KNNImputer()

    dataset_orig_train_no_sens = pd.DataFrame(imputer.fit_transform(dataset_orig_train_no_sens), 
                                              columns=dataset_orig_train_no_sens.columns, 
                                              index=dataset_orig_train_no_sens.index)
    dataset_orig_test_no_sens = pd.DataFrame(imputer.transform(dataset_orig_test_no_sens), 
                                             columns=dataset_orig_test_no_sens.columns, 
                                             index=dataset_orig_test_no_sens.index)
    dataset_orig_train = pd.concat([dataset_orig_train_no_sens, dataset_orig_train[['gender','income']]], axis=1)
    dataset_orig_test = pd.concat([dataset_orig_test_no_sens, dataset_orig_test[['gender','income']]], axis=1)


    dataset_orig_valid, dataset_orig_test = train_test_split(dataset_orig_test, test_size=0.5, random_state=seed)
    

    ### Converting to AIF360 StandardDataset objects ###
    dataset_orig_train = StandardDataset(dataset_orig_train, label_name='income', favorable_classes=[1],
                                         protected_attribute_names=['gender'], privileged_classes=[[1]])
    dataset_orig_valid = StandardDataset(dataset_orig_valid, label_name='income', favorable_classes=[1],
                                         protected_attribute_names=['gender'], privileged_classes=[[1]])
    dataset_orig_test = StandardDataset(dataset_orig_test, label_name='income', favorable_classes=[1],
                                         protected_attribute_names=['gender'], privileged_classes=[[1]])

    # Placeholder for predicted and transformed datasets
    dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
    dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)

    dataset_new_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    dataset_new_test_pred = dataset_orig_test.copy(deepcopy=True)

    idx_wo_protected = list(set(range(13))-set([8]))
    X_train = dataset_orig_train.features[:,idx_wo_protected]
    y_train = dataset_orig_train.labels.ravel()

    X_train.shape
    # lmod = LogisticRegression()
    lmod = DecisionTreeClassifier(max_depth=3)

    lmod.fit(X_train, y_train)

    fav_idx = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]
    y_train_pred_prob = lmod.predict_proba(X_train)[:,fav_idx]

    # Prediction probs for validation and testing data
    X_valid = dataset_orig_valid.features[:,idx_wo_protected]
    y_valid_pred_prob = lmod.predict_proba(X_valid)[:,fav_idx]

    X_test = dataset_orig_test.features[:,idx_wo_protected]
    y_test_pred_prob = lmod.predict_proba(X_test)[:,fav_idx]

    class_thresh = 0.5
    dataset_orig_train_pred.scores = y_train_pred_prob.reshape(-1,1)
    dataset_orig_valid_pred.scores = y_valid_pred_prob.reshape(-1,1)
    dataset_orig_test_pred.scores = y_test_pred_prob.reshape(-1,1)

    y_train_pred = np.zeros_like(dataset_orig_train_pred.labels)
    y_train_pred[y_train_pred_prob >= class_thresh] = dataset_orig_train_pred.favorable_label
    y_train_pred[~(y_train_pred_prob >= class_thresh)] = dataset_orig_train_pred.unfavorable_label
    dataset_orig_train_pred.labels = y_train_pred

    y_valid_pred = np.zeros_like(dataset_orig_valid_pred.labels)
    y_valid_pred[y_valid_pred_prob >= class_thresh] = dataset_orig_valid_pred.favorable_label
    y_valid_pred[~(y_valid_pred_prob >= class_thresh)] = dataset_orig_valid_pred.unfavorable_label
    dataset_orig_valid_pred.labels = y_valid_pred

    y_test_pred = np.zeros_like(dataset_orig_test_pred.labels)
    y_test_pred[y_test_pred_prob >= class_thresh] = dataset_orig_test_pred.favorable_label
    y_test_pred[~(y_test_pred_prob >= class_thresh)] = dataset_orig_test_pred.unfavorable_label
    dataset_orig_test_pred.labels = y_test_pred

    
    cpp = EqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     seed=randseed)
    cpp = cpp.fit(dataset_orig_valid, dataset_orig_valid_pred)
    
    dataset_transf_valid_pred = cpp.predict(dataset_orig_valid_pred)
    dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred)
    
    cm_pred_test = ClassificationMetric(dataset_orig_test, dataset_orig_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    
    display(Markdown("#### Original-Predicted testing dataset"))
    print("Difference in FPR between unprivileged and privileged groups")
    print(cm_pred_test.difference(cm_pred_test.false_positive_rate))
    
    print("Overall Test Accuracy ")
    print(cm_pred_test.accuracy())
    
    cm_transf_test = ClassificationMetric(dataset_orig_test, dataset_transf_test_pred,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
    display(Markdown("#### Original-Transformed testing dataset"))
    print("Difference in FPR between unprivileged and privileged groups")
    fr = np.abs(cm_transf_test.difference(cm_transf_test.false_positive_rate))
    fr_list.append(fr)
    print(fr)

    print("Overall Test Accuracy ")
    acc = cm_transf_test.accuracy()
    acc_list.append(acc)
    print(acc)
    


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.44149720011788984
Overall Test Accuracy 
0.7670068027210885


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.0005894488653109264
Overall Test Accuracy 
0.6360544217687075


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.13603104212860312
Overall Test Accuracy 
0.7746598639455783


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.03560606060606061
Overall Test Accuracy 
0.7168367346938775


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.4458255965889553
Overall Test Accuracy 
0.7695578231292517


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.0024929509662334204
Overall Test Accuracy 
0.6505102040816326


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.4873468729851709
Overall Test Accuracy 
0.7738095238095238


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.05937029873200084
Overall Test Accuracy 
0.6471088435374149


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.4277472503163352
Overall Test Accuracy 
0.7695578231292517


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.02212134887136874
Overall Test Accuracy 
0.6377551020408163


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.4355433789954338
Overall Test Accuracy 
0.782312925170068


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.0325296803652968
Overall Test Accuracy 
0.6471088435374149


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.43002879473467703
Overall Test Accuracy 
0.7763605442176871


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.00863842040312629
Overall Test Accuracy 
0.6360544217687075


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.4134584851032035
Overall Test Accuracy 
0.7721088435374149


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.029812272345194735
Overall Test Accuracy 
0.6513605442176871


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.38723765181455616
Overall Test Accuracy 
0.7695578231292517


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.09286629491274612
Overall Test Accuracy 
0.6845238095238095


#### Original-Predicted testing dataset

Difference in FPR between unprivileged and privileged groups
-0.3219229801157815
Overall Test Accuracy 
0.7831632653061225


#### Original-Transformed testing dataset

Difference in FPR between unprivileged and privileged groups
0.07709179820934164
Overall Test Accuracy 
0.6607142857142857


In [18]:
acc_mean = [np.array(acc_list).mean()]
acc_std = [np.array(acc_list).std()]
fr_mean = [np.array(fr_list).mean()]
fr_std = [np.array(fr_list).std()]

## Saving the Results

In [19]:
# with open('results/mean_eqodds_result.pkl', 'wb+') as f: 
with open('results/knn_eqodds_result.pkl', 'wb+') as f: 
    pickle.dump({'fr_mean': fr_mean, 'fr_std': fr_std, 'acc_mean': acc_mean, 'acc_std': acc_std}, f)