In [1]:
from src.TestProcedure import *
from src.BaselineModel import *
from src.AdultData import build_adult_data, normalize

import pandas as pd
import cvxpy as cp
import numpy as np

from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics.scorer import make_scorer



In [2]:
!git clone https://github.com/mlohaus/SearchFair.git
%cd SearchFair

Cloning into 'SearchFair'...
remote: Enumerating objects: 86, done.[K
remote: Counting objects: 100% (86/86), done.[K
remote: Compressing objects: 100% (60/60), done.[K
remote: Total 86 (delta 32), reused 74 (delta 24), pack-reused 0[K
Unpacking objects: 100% (86/86), done.
/content/SearchFair


#### Load Unbalanced (Raw) Dataset

In [3]:
# Load data into pandas DataFrame
unbalanced_dataset = pd.read_csv('data/adult/adult.csv')

# Drop fnlwgt, education, education-num, capital-gain, capital-loss as Lohaus et al do
unbalanced_dataset = unbalanced_dataset.drop(columns=['fnlwgt', 'education', 'capital-gain', 'capital-loss'])

#### Load Balanced (Fixed) Dataset

In [12]:
# Load data into pandas DataFrame
dataset = pd.read_csv('data/adult/adult.csv')
data50minus = dataset[dataset["income"].str.contains("<=50K")].iloc[:11687]
data50plus = dataset[dataset["income"].str.contains(">50K")].iloc[:11687]
databalanced = pd.concat([data50minus, data50plus])
balanced_dataset = databalanced.sample(frac=1).reset_index(drop=True)
# Drop fnlwgt, education, education-num, capital-gain, capital-loss as Lohaus et al do
balanced_dataset = balanced_dataset.drop(columns=['fnlwgt', 'education', 'capital-gain', 'capital-loss'])

### Baseline Results on Unbalanced Dataset

In [5]:
baseline_linear_hinge_unbalanced = BaselineModel(kernel='linear',loss_name='hinge')
baseline_rbf_hinge_unbalanced = BaselineModel(kernel='rbf',loss_name='hinge')

#### Sensitive Attribute = 'Sex' 

In [6]:
baseline_1_tester = TestProcedure(baseline_linear_hinge_unbalanced)
baseline_1_test_results = baseline_1_tester.RunTest(dataset=unbalanced_dataset,sens_attribute='sex')

Sensitive Attribute: sex
Kernel Type: linear
Loss Func: hinge
Run Time: 2.9782 seconds
Prediction Accuracy: 80.7817 %
DDP Score: 0.2505
DEO Score: 0.1817


In [7]:
baseline_2_tester = TestProcedure(baseline_rbf_hinge_unbalanced)
baseline_2_test_results = baseline_2_tester.RunTest(dataset=unbalanced_dataset,sens_attribute='sex')

Sensitive Attribute: sex
Kernel Type: rbf
Loss Func: hinge
Run Time: 2.6491 seconds
Prediction Accuracy: 81.1868 %
DDP Score: 0.2377
DEO Score: 0.186


#### Sensitive Attribute = 'Race' 

In [8]:
baseline_3_tester = TestProcedure(baseline_linear_hinge_unbalanced)
baseline_3_test_results = baseline_3_tester.RunTest(dataset=unbalanced_dataset,sens_attribute='race')

Sensitive Attribute: race
Kernel Type: linear
Loss Func: hinge
Run Time: 2.2018 seconds
Prediction Accuracy: 80.7271 %
DDP Score: 0.0956
DEO Score: 0.0135


In [9]:
baseline_4_tester = TestProcedure(baseline_rbf_hinge_unbalanced)
baseline_4_test_results = baseline_4_tester.RunTest(dataset=unbalanced_dataset,sens_attribute='race')

Sensitive Attribute: race
Kernel Type: rbf
Loss Func: hinge
Run Time: 2.403 seconds
Prediction Accuracy: 81.0482 %
DDP Score: 0.0819
DEO Score: 0.0414


### Baseline Results on Balanced Dataset

In [10]:
baseline_linear_hinge_balanced = BaselineModel(kernel='linear',loss_name='hinge')
baseline_rbf_hinge_balanced = BaselineModel(kernel='rbf',loss_name='hinge')

#### Sensitive Attribute = 'Sex'

In [13]:
baseline_5_tester = TestProcedure(baseline_linear_hinge_balanced)
baseline_5_test_results = baseline_5_tester.RunTest(dataset=balanced_dataset,sens_attribute='sex')

Sensitive Attribute: sex
Kernel Type: linear
Loss Func: hinge
Run Time: 4.1837 seconds
Prediction Accuracy: 75.9042 %
DDP Score: 0.4726
DEO Score: 0.264


In [14]:
baseline_6_tester = TestProcedure(baseline_rbf_hinge_balanced)
baseline_6_test_results = baseline_6_tester.RunTest(dataset=balanced_dataset,sens_attribute='sex')

Sensitive Attribute: sex
Kernel Type: rbf
Loss Func: hinge
Run Time: 2.7295 seconds
Prediction Accuracy: 77.1399 %
DDP Score: 0.4528
DEO Score: 0.2752


#### Sensitive Attribute = 'Race' 

In [15]:
baseline_7_tester = TestProcedure(baseline_linear_hinge_balanced)
baseline_7_test_results = baseline_7_tester.RunTest(dataset=balanced_dataset,sens_attribute='race')

Sensitive Attribute: race
Kernel Type: linear
Loss Func: hinge
Run Time: 2.0199 seconds
Prediction Accuracy: 75.9764 %
DDP Score: 0.1552
DEO Score: 0.0423


In [16]:
baseline_8_tester = TestProcedure(baseline_rbf_hinge_balanced)
baseline_8_test_results = baseline_8_tester.RunTest(dataset=balanced_dataset,sens_attribute='race')

Sensitive Attribute: race
Kernel Type: rbf
Loss Func: hinge
Run Time: 2.4019 seconds
Prediction Accuracy: 76.1568 %
DDP Score: 0.1623
DEO Score: 0.0478


## Baseline Hyperparameter Grid Search **for Accuracy**

### Unbalanced Dataset 

In [17]:
sens_attribute = 'sex'

grid_search_1_model = BaselineModel()

beta_params = [0.0001, 0.001, 0.01] # For Linear Kernel
gamma_params = [0.01, 0.1, 1] # For RBF Kernel
kernel_params = ['linear','rbf']
cv_params = {'l2_beta': beta_params,'gamma': gamma_params,'kernel':kernel_params}

x_data, y_data, s_data = build_adult_data(unbalanced_dataset,sens_attribute,load_data_size=None)
x_train, x_test, y_train, y_test, s_train, s_test = train_test_split(x_data, y_data, s_data, train_size=1200, shuffle=True)

grid_accuracy_unbalanced = GridSearchCV(grid_search_1_model,cv_params, cv=4, n_jobs=1, scoring='accuracy')
grid_accuracy_unbalanced.fit(x_train, y_train, s_train = s_train)

GridSearchCV(cv=4, error_score=nan,
             estimator=BaselineModel(gamma=0.1, kernel='linear', l2_beta=0.001,
                                     lambda_max=1, loss_name='hinge',
                                     max_iter=3000, reason_points=0.5,
                                     solver='SCS', verbose=False),
             iid='deprecated', n_jobs=1,
             param_grid={'gamma': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf'],
                         'l2_beta': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [18]:
grid_accuracy_unbalanced.cv_results_

{'mean_fit_time': array([1.20719606, 1.2784189 , 1.19964206, 6.80310798, 1.39515048,
        1.88843232, 1.14222383, 1.16476566, 1.11116141, 1.81194919,
        1.15477502, 1.41130453, 1.10925257, 1.13331962, 1.06920606,
        8.39622611, 1.36117834, 1.23755175]),
 'mean_score_time': array([0.00179774, 0.00433099, 0.00179869, 0.0086382 , 0.00708729,
        0.01316088, 0.00286865, 0.00171763, 0.00163621, 0.00628066,
        0.00634682, 0.00643331, 0.00168717, 0.0017342 , 0.00167555,
        0.01085103, 0.00638688, 0.00668693]),
 'mean_test_score': array([0.81083333, 0.81083333, 0.81083333, 0.81083333, 0.81583333,
        0.75416667, 0.81083333, 0.81083333, 0.81083333, 0.81333333,
        0.81      , 0.81083333, 0.81083333, 0.81083333, 0.81083333,
        0.81833333, 0.82166667, 0.82583333]),
 'param_gamma': masked_array(data=[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1,
                    0.1, 0.1, 1, 1, 1, 1, 1, 1],
              mask=[False, False, False, False, False, 

### Balanced Dataset 

In [19]:
grid_search_2_model = BaselineModel()

beta_params = [0.0001, 0.001, 0.01] # For Linear Kernel
gamma_params = [0.01, 0.1, 1] # For RBF Kernel
kernel_params = ['linear','rbf']
cv_params = {'l2_beta': beta_params,'gamma': gamma_params,'kernel':kernel_params}

x_data, y_data, s_data = build_adult_data(balanced_dataset,sens_attribute,load_data_size=None)
x_train, x_test, y_train, y_test, s_train, s_test = train_test_split(x_data, y_data, s_data, train_size=1200, shuffle=True)

grid_accuracy_balanced = GridSearchCV(grid_search_2_model,cv_params, cv=4, n_jobs=1, scoring='accuracy')
grid_accuracy_balanced.fit(x_train, y_train, s_train = s_train)

GridSearchCV(cv=4, error_score=nan,
             estimator=BaselineModel(gamma=0.1, kernel='linear', l2_beta=0.001,
                                     lambda_max=1, loss_name='hinge',
                                     max_iter=3000, reason_points=0.5,
                                     solver='SCS', verbose=False),
             iid='deprecated', n_jobs=1,
             param_grid={'gamma': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf'],
                         'l2_beta': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [20]:
grid_accuracy_balanced.cv_results_

{'mean_fit_time': array([1.09765297, 0.9905321 , 1.00305456, 4.18562233, 1.90989661,
        1.40704501, 1.04397076, 0.98558491, 0.94311798, 2.2928803 ,
        1.20332927, 1.69422036, 1.09758472, 0.99101532, 0.98380077,
        5.58235627, 1.13196176, 1.20283735]),
 'mean_score_time': array([0.00235182, 0.00237882, 0.00178182, 0.0109877 , 0.00637692,
        0.008524  , 0.00281286, 0.00546473, 0.00171304, 0.00682014,
        0.00865889, 0.01067275, 0.00266516, 0.00186557, 0.00295907,
        0.00635469, 0.00633723, 0.00722808]),
 'mean_test_score': array([0.7725    , 0.7725    , 0.7725    , 0.7725    , 0.7725    ,
        0.77583333, 0.7725    , 0.7725    , 0.7725    , 0.7625    ,
        0.76916667, 0.7725    , 0.7725    , 0.7725    , 0.7725    ,
        0.77166667, 0.77166667, 0.7775    ]),
 'param_gamma': masked_array(data=[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1,
                    0.1, 0.1, 1, 1, 1, 1, 1, 1],
              mask=[False, False, False, False, False, 

## Baseline Hyperparameter Grid Search **for DDP**

In [22]:
grid_split_counter = 1

def get_positive_rate(y_predicted, y_true):
    tn, fp, fn, tp = confusion_matrix(y_true, y_predicted).ravel()
    pr = (tp+fp) / (tp+fp+tn+fn)
    return pr

def DDP_Grid_Scoring(y_true,y_predicted,sens_attr,size):
    global grid_split_counter
    
    chunk_size = 1200/size
    sens_attribute = sens_attr[int((grid_split_counter - 1)*chunk_size): int(grid_split_counter*chunk_size)]
    
    if grid_split_counter == size:
        grid_split_counter = 1
    else:
        grid_split_counter += 1
    
    positive_rate_prot = get_positive_rate(y_predicted[sens_attribute==-1], y_true[sens_attribute==-1])
    positive_rate_unprot = get_positive_rate(y_predicted[sens_attribute==1], y_true[sens_attribute==1])
    
    DDP = abs(positive_rate_unprot - positive_rate_prot)
    return DDP
size = 4
DDP_scorer = make_scorer(DDP_Grid_Scoring, greater_is_better=False, sens_attr = s_train, size = size)

### Unbalanced Dataset 

In [23]:
grid_search_3_model = BaselineModel()

beta_params = [0.0001, 0.001, 0.01] # For Linear Kernel
gamma_params = [0.01, 0.1, 1] # For RBF Kernel
kernel_params = ['linear','rbf']
cv_params = {'l2_beta': beta_params,'gamma': gamma_params,'kernel':kernel_params}

x_data, y_data, s_data = build_adult_data(unbalanced_dataset,sens_attribute,load_data_size=None)
x_train, x_test, y_train, y_test, s_train, s_test = train_test_split(x_data, y_data, s_data, train_size=1200, shuffle=True)

grid_ddp_unbalanced = GridSearchCV(grid_search_3_model,cv_params, cv=size, n_jobs=1, scoring=DDP_scorer)
grid_ddp_unbalanced.fit(x_train, y_train, s_train = s_train)

GridSearchCV(cv=4, error_score=nan,
             estimator=BaselineModel(gamma=0.1, kernel='linear', l2_beta=0.001,
                                     lambda_max=1, loss_name='hinge',
                                     max_iter=3000, reason_points=0.5,
                                     solver='SCS', verbose=False),
             iid='deprecated', n_jobs=1,
             param_grid={'gamma': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf'],
                         'l2_beta': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=make_scorer(DDP_Grid_Scoring, greater_is_better=False, sens_attr=[-1  1  1 ...  1  1 -1], size=4),
             verbose=0)

In [24]:
grid_ddp_unbalanced.cv_results_

{'mean_fit_time': array([ 2.22848421,  3.00039572,  3.30898368,  3.7470513 ,  2.57386315,
         1.84289491,  2.26280993,  3.08184487,  3.27925593,  2.24854636,
         1.23800302,  1.60500914,  2.25885379,  3.05390179,  3.31596917,
        13.27474719,  1.48426509,  1.27812821]),
 'mean_score_time': array([0.00431025, 0.00416374, 0.00554878, 0.00912488, 0.01007831,
        0.01039159, 0.00539005, 0.00422239, 0.00453377, 0.01194519,
        0.00927061, 0.01337504, 0.00416052, 0.00567204, 0.00426328,
        0.01131594, 0.00912982, 0.01148134]),
 'mean_test_score': array([-0.0675471 , -0.05998992, -0.04440507, -0.04637923, -0.02826469,
         0.        , -0.0675471 , -0.05998992, -0.04440507, -0.03512819,
        -0.05058532, -0.04738412, -0.0675471 , -0.05998992, -0.04440507,
        -0.02860516, -0.02724336, -0.0206946 ]),
 'param_gamma': masked_array(data=[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1,
                    0.1, 0.1, 1, 1, 1, 1, 1, 1],
              mask=

### Balanced Dataset

In [25]:
grid_search_4_model = BaselineModel()

beta_params = [0.0001, 0.001, 0.01] # For Linear Kernel
gamma_params = [0.01, 0.1, 1] # For RBF Kernel
kernel_params = ['linear','rbf']
cv_params = {'l2_beta': beta_params,'gamma': gamma_params,'kernel':kernel_params}

x_data, y_data, s_data = build_adult_data(balanced_dataset,sens_attribute,load_data_size=None)
x_train, x_test, y_train, y_test, s_train, s_test = train_test_split(x_data, y_data, s_data, train_size=1200, shuffle=True)

grid_ddp_balanced = GridSearchCV(grid_search_4_model,cv_params, cv=size, n_jobs=1, scoring=DDP_scorer)
grid_ddp_balanced.fit(x_train, y_train, s_train = s_train)

GridSearchCV(cv=4, error_score=nan,
             estimator=BaselineModel(gamma=0.1, kernel='linear', l2_beta=0.001,
                                     lambda_max=1, loss_name='hinge',
                                     max_iter=3000, reason_points=0.5,
                                     solver='SCS', verbose=False),
             iid='deprecated', n_jobs=1,
             param_grid={'gamma': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf'],
                         'l2_beta': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=make_scorer(DDP_Grid_Scoring, greater_is_better=False, sens_attr=[-1  1  1 ...  1  1 -1], size=4),
             verbose=0)

In [26]:
grid_ddp_balanced.cv_results_

{'mean_fit_time': array([1.26989788, 1.183608  , 1.16977447, 3.50820816, 2.3081789 ,
        1.51904559, 1.21850479, 1.18183589, 1.20453262, 2.26028287,
        1.27595901, 1.59541029, 1.22838646, 1.17255813, 1.17086971,
        2.72882837, 1.12733746, 1.17926806]),
 'mean_score_time': array([0.0066185 , 0.0042212 , 0.00517392, 0.00894862, 0.00885636,
        0.00892937, 0.00433064, 0.0045476 , 0.00570983, 0.01269633,
        0.01414382, 0.01132411, 0.00454259, 0.00480527, 0.00471401,
        0.00896841, 0.01169652, 0.01162612]),
 'mean_test_score': array([-0.04277448, -0.03056034, -0.0366241 , -0.0421681 , -0.03315909,
        -0.0370029 , -0.04277448, -0.03056034, -0.0366241 , -0.0638282 ,
        -0.04414838, -0.0421681 , -0.04277448, -0.03056034, -0.0366241 ,
        -0.04092351, -0.03365023, -0.04343507]),
 'param_gamma': masked_array(data=[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1,
                    0.1, 0.1, 1, 1, 1, 1, 1, 1],
              mask=[False, False, Fal