# Machine Learning Assignment
## Setup Libraries and Custom Functions

In [1]:
# Install libraries?
!pip install numpy
!pip install scipy
!pip install pandas
!pip install aif360
!pip install sklearn
!pip install dataframe-image

In [2]:
# Import Libraries
import numpy as np
import scipy as sp
import pandas as pd
from aif360.datasets import AdultDataset
from aif360.datasets import GermanDataset
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_german
from aif360.metrics import ClassificationMetric
from aif360.sklearn.metrics import make_scorer
from aif360.sklearn.metrics import equal_opportunity_difference
from aif360.algorithms.preprocessing.reweighing import Reweighing
from sklearn.preprocessing import StandardScaler  #MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import *
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
import dataframe_image as dfi

# Set numpy random seed
np.random.seed(0)

In [3]:
# Define any functions and classes to be used

# K-fold cross validation for TPR Difference
def ManualKFold(data,method,unprivileged_groups,privileged_groups,**params):
    tpr_diffs=[]
    # Always reset seed as sci-kit learn uses numpy random seed and we want to validate results throughout
    np.random.seed(0)
    
    # 5-fold validation for loop
    for i in range(5):
        # use aif360 subset method to create train and test data, using indices for distinct fifths of samples
        train_i = data.subset([j for j in range(0,int(np.floor(i*data.features.shape[0]/5)))] +
                              [j for j in range(int(np.floor((i+1)*data.features.shape[0]/5)), 
                                                data.features.shape[0]-1)])
        test_i = data.subset([j for j in range(int(np.floor(i*data.features.shape[0]/5)),
                                               int(np.floor((i+1)*data.features.shape[0]/5)))])
        # scale to min/max and get targets
        X_train_i = scale_orig.fit_transform(train_i.features)
        y_train_i = train_i.labels.ravel()
        # scale to min/max and get targets    
        X_test_i = scale_orig.transform(test_i.features)
        y_test_i = test_i.labels.ravel()
            
        if method == 'LogReg':
            # Logistic regression defined here
            learner = LogisticRegression(**params,random_state=1)
        elif method == 'RForest':
            # Randomn Forest defined here
            learner = RandomForestClassifier(**params,random_state=1)
        else:
            # Other methods may be coded here
            raise ValueError('Invalid classification method')
        
        # Fit to chosen classification method and predict on test set
        learner.fit(X_train_i,y_train_i,sample_weight=train_i.instance_weights)
        predictions = learner.predict(X_test_i)
        
        # Compute TPR Difference using AIF360 metrics
        test_i_pred = test_i.copy()
        test_i_pred.labels = predictions
        metric = ClassificationMetric(test_i, test_i_pred, unprivileged_groups=unprivileged_groups, 
                                      privileged_groups=privileged_groups)
            
        tpr_diff = metric.equal_opportunity_difference()
        tpr_diffs.append(tpr_diff)
    # return all values    
    return tpr_diffs

# Grid search for TPR difference
def GridSearchTPR(data,method,unprivileged_groups,privileged_groups,param_values_dict):
    # Find best params for TPR difference fairness
    param_grid_dicts = ParameterGrid(param_values_dict)
    
    print("Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference")
    
    best_score = -1
    best_params = {}
    tpr_diff_means = []
    # Loop through each parameter combination
    for params in param_grid_dicts:
        # Perform K-Fold validation for chosen parameters
        tpr_diffs = ManualKFold(data,method,unprivileged_groups,privileged_groups,**params)
        tpr_diff_means.append(np.mean(tpr_diffs))
        
        # update best score and parameters if current score is greater than max score
        if np.mean(tpr_diffs) > best_score:
            best_params = params
            best_score = np.mean(tpr_diffs)
        
    print("Best: %f using %s" % (best_score, best_params))
    # Output best score, best parameters and all scores and parameters in lists
    return (best_score, best_params, tpr_diff_means, param_grid_dicts)

# Grid search for accuracy
def GridSearchAccuracy(model,grid,X,y,instance_weights):
    # reset random seed again
    np.random.seed(0)
    
    weighted_grid = grid.copy()
    # add instance weights to parameters for GridSearchCV
    weighted_grid['sample_weights'] = instance_weights
    # define folds
    kf = KFold(n_splits=5,shuffle=True,random_state=1)
    # define grid search for best accuracy
    grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=kf, scoring='accuracy',error_score=0)
    # fit grid to data
    grid_result = grid_search.fit(X, y)
    
    # summarize results
    print("Accuracy")
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    # return best score, best parameters and all grid search results
    return (grid_result.best_score_, grid_result.best_params_, grid_result.cv_results_)

# Get the accuracy and fairness for the most accurate and fairest parameters
def GetAccAndFairnessForBestParams(acc_params,fair_params,X_train,y_train,X_test,y_test,test,
                                   unprivileged_groups,privileged_groups,method,instance_weights):
    # reset random seed again
    np.random.seed(0)
    
    if method == 'LogReg':
        # most accurate parameters fit for Logistic Regression
        acc_fit = LogisticRegression(C=acc_params['C'],penalty=acc_params['penalty'],
                                     solver=acc_params['solver']).fit(X_train,y_train,sample_weight=instance_weights)
        # fairest parameters fit for Logistic Regression
        fair_fit = LogisticRegression(C=fair_params['C'],penalty=fair_params['penalty'],
                              solver=fair_params['solver']).fit(X_train,y_train,sample_weight=instance_weights)
    elif method == 'RForest':
        # most accurate parameters fit for Random Forest
        acc_fit = RandomForestClassifier(bootstrap=True,max_depth=acc_params['max_depth'],
                                         max_features=acc_params['max_features'],min_samples_leaf=1,
                                         min_samples_split=2,n_estimators=acc_params['n_estimators'],
                                         random_state=1).fit(X_train,y_train,sample_weight=instance_weights)
        # most accurate parameters fit for Random Forest
        fair_fit = RandomForestClassifier(bootstrap=True,max_depth=fair_params['max_depth'],
                                         max_features=fair_params['max_features'],min_samples_leaf=1,
                                         min_samples_split=2,n_estimators=fair_params['n_estimators'],
                                         random_state=1).fit(X_train,y_train,sample_weight=instance_weights)
    else:
        raise ValueError('Invalid classification method')
    
    # Predict on test set using chosen fit
    acc_predict = acc_fit.predict(X_test)
    fair_predict = fair_fit.predict(X_test)
    
    # calculate TPR difference for most accurate parameters
    acc_test_pred = test.copy()
    acc_test_pred.labels = acc_predict
    acc_metric = ClassificationMetric(test,acc_test_pred,unprivileged_groups=unprivileged_groups,
                                      privileged_groups=privileged_groups)
    acc_tpr_diff = acc_metric.equal_opportunity_difference()
     
    # calculate TPR difference for most fairest parameters    
    fair_test_pred = test.copy()
    fair_test_pred.labels = fair_predict
    fair_metric = ClassificationMetric(test,fair_test_pred,unprivileged_groups=unprivileged_groups,
                                       privileged_groups=privileged_groups_ad)
    fair_tpr_diff = fair_metric.equal_opportunity_difference()
    
    # calculate accuracy for both parameters
    acc_acc = sum(acc_predict==y_test)/len(y_test)
    fair_acc = sum(fair_predict==y_test)/len(y_test)
        
    print(acc_params, "Accuracy", acc_acc)
    print(fair_params, "Accuracy", fair_acc)
    print(acc_params, "Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference", 
          acc_tpr_diff)
    print(fair_params, "Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference", 
          fair_tpr_diff)
    # return the accuracy then fairness respectively, for the most accurate parameters then the fairest ones
    return (acc_acc, fair_acc, acc_tpr_diff, fair_tpr_diff)

# Get accuracy for chosen parameters
def GetAccuracy(params,X_train,y_train,X_test,y_test,test,unprivileged_groups,privileged_groups,method,
                instance_weights):
    # reset seed again
    np.random.seed(0)
    
    # choose fit method depending on user input
    if method == 'LogReg':
        fit = LogisticRegression(C=params['C'],penalty=params['penalty'],
                                 solver=params['solver']).fit(X_train,y_train,sample_weight=instance_weights)
    elif method == 'RForest':
        fit = RandomForestClassifier(bootstrap=True,max_depth=params['max_depth'],
                                         max_features=params['max_features'],min_samples_leaf=1,
                                         min_samples_split=2,n_estimators=params['n_estimators'],
                                         random_state=1).fit(X_train,y_train,sample_weight=instance_weights)
    else:
        raise ValueError('Invalid classification method')
    
    # make prediction on test set
    predict = fit.predict(X_test)
    
    # calculate accuracy
    test_pred = test.copy()
    test_pred.labels = predict
    acc = sum(predict==y_test)/len(y_test)
          
    return acc

# Get fairness (TPR difference) for chosen parameters
def GetFairness(params,X_train,y_train,X_test,y_test,test,unprivileged_groups,privileged_groups,method,
                instance_weights):
    # reset seed again
    np.random.seed(0)
    
    # choose fit method depending on user input
    if method == 'LogReg':
        fit = LogisticRegression(C=params['C'],penalty=params['penalty'],
                                 solver=params['solver']).fit(X_train,y_train,sample_weight=instance_weights)
    elif method == 'RForest':
        fit = RandomForestClassifier(bootstrap=True,max_depth=params['max_depth'],
                                         max_features=params['max_features'],min_samples_leaf=1,
                                         min_samples_split=2,n_estimators=params['n_estimators'],
                                         random_state=1).fit(X_train,y_train,sample_weight=instance_weights)
    else:
        raise ValueError('Invalid classification method')
    
    # make prediction on test set
    predict = fit.predict(X_test)
    
    # calculate TPR difference
    test_pred = test.copy()
    test_pred.labels = predict
    metric = ClassificationMetric(test,test_pred,unprivileged_groups=unprivileged_groups,
                                      privileged_groups=privileged_groups)
    tpr_diff = metric.equal_opportunity_difference()
          
    return tpr_diff

# Find the best model i.e. the model and parameters that provide an accuracy within a user defined tolerance
# of the best accuracy achievable from all models and parameters tested, and that provide the greatest fairness
def FindBestModel(methods,param_dicts,X_train,y_train,train,instance_weights,unprivileged_groups,privileged_groups,
                  X_test,y_test,test,tolerance=0.1):
    if len(methods)!=len(param_dicts):
        raise ValueError('Need one parameter grid per model')
    
    # reset seed again
    np.random.seed(0)
    
    # convert shorthand strings for classification methods to models for the accuracy grid search
    models={}
    for method in methods:
        if method == 'LogReg':
            models[method] = LogisticRegression()
        elif method == 'RForest':
            models[method] = RandomForestClassifier()
        else:
            raise ValueError('Invalid classification method')
    
    # find the best achievable accuracy across all models and parameters
    acc_grid_results = []
    # best achievable accuracy for each method in a list
    best_accs = []
    # best achievable accuracy for all methods and parameters
    best_acc = 0
    for i in range(len(methods)):
        # accuracy grid search
        acc_score, acc_params, acc_result = GridSearchAccuracy(
            models[methods[i]],param_dicts[i],X_train,y_train,instance_weights)
        
        acc_grid_results.append(acc_result)
        best_accs.append(acc_score)
        if acc_score > best_acc:
            best_acc = acc_score
    
    # now find the best TPR difference achievable within the accuracy tolerance
    acc = 0
    fairness = -1
    model = ''
    params = {}
    for i in range(len(methods)):
        # don't bother checking if best accuracy for this method doesn't fall within the tolerance for current best
        if best_accs[i] >= (1-tolerance)*best_acc:
            # TPR grid search for best value
            fair_score, fair_params, tpr_diff_means, param_grid_dicts = GridSearchTPR(
                train,methods[i],unprivileged_groups,privileged_groups,param_dicts[i])
            for j in range(len(acc_grid_results[i]['mean_test_score'])):
                ij_acc = acc_grid_results[i]['mean_test_score'][j]
                # check current parameters provide enough accuracy
                if ij_acc >= (1-tolerance)*best_acc:
                    ij_fairness = tpr_diff_means[j]
                    # then check the fairness (TPR difference) is improved
                    if ij_fairness > fairness:
                        # set new best parameter values, but don't change the best achievable accuracy
                        acc = ij_acc
                        fairness = ij_fairness
                        model = methods[i]
                        params = param_grid_dicts[j]
    
    # now test the best parameters for accuracy and TPR difference on the test set
    test_acc = GetAccuracy(params,X_train,y_train,X_test,y_test,test,unprivileged_groups,privileged_groups,model,
                instance_weights)
    test_fair = GetFairness(params,X_train,y_train,X_test,y_test,test,unprivileged_groups,privileged_groups,model,
                instance_weights)
    print(f"Best model: {model} using params {params}, train accuracy: {acc}, train TPR difference {fairness}, test accuracy: {test_acc} and test TPR difference: {test_fair}")
    # output the best model, parameters and the accuracy and fairness on the test set
    return (model, params, test_acc, test_fair)

In [4]:
# Import Adult Dataset
label_map = {1.0: '>50K', 0.0: '<=50K'}
protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
ad = AdultDataset(protected_attribute_names=['sex'],
                  categorical_features=['workclass', 'education', 'marital-status',
                                        'occupation', 'relationship', 'native-country', 'race'],
                  privileged_classes=[['Male']], 
                  metadata={'label_map': label_map, 'protected_attribute_maps': protected_attribute_maps})
(ad_df,ad_fts) = ad.convert_to_dataframe()



In [5]:
# Import German Dataset
label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
gd = GermanDataset(protected_attribute_names=['sex'],
                   privileged_classes=[['male']], 
                   metadata={'label_map': label_map,'protected_attribute_maps': protected_attribute_maps})
(gd_df,gd_fts) = gd.convert_to_dataframe()

## Task 1

In [6]:
# Load adult dataset with binary sensitive attribute as sex
privileged_groups_ad = [{'sex': 1}]
unprivileged_groups_ad = [{'sex': 0}]
ad_dataset = load_preproc_data_adult(['sex'])

# Split between training and test set.
train_ad, test_ad = ad_dataset.split([0.7], shuffle=False)
train_ad_instance_weights = train_ad.instance_weights
print("training data size", train_ad.features.shape)
print("dataset feature names", train_ad.feature_names)

# Normalize the dataset, both train and test. This should always be done in any machine learning pipeline!
scale_orig = StandardScaler()
X_train_ad = scale_orig.fit_transform(train_ad.features)
y_train_ad = train_ad.labels.ravel()

X_test_ad = scale_orig.transform(test_ad.features) 
y_test_ad = test_ad.labels.ravel()

training data size (34189, 18)
dataset feature names ['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']


In [7]:
# define logistic regression parameter grid dictionary
solvers = ['newton-cg', 'lbfgs', 'liblinear']
penalty = ['l2']
c_values = [100, 10, 1.0, 0.1, 0.01]
params_dict_lr = dict(solver=solvers,penalty=penalty,C=c_values)

In [8]:
# find the best parameters for accuracy for logistic regression on the adult train set
model = LogisticRegression()
acc_score_ad, acc_params_ad, acc_result_ad = GridSearchAccuracy(model,params_dict_lr,X_train_ad,y_train_ad,
                                                                train_ad_instance_weights)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}


In [9]:
# find the best parameters for TPR difference for logistic regression on the adult train set
fair_score_ad, fair_params_ad, tpr_diff_means_ad, param_grid_dicts_ad = GridSearchTPR(
    train_ad,'LogReg',unprivileged_groups_ad,privileged_groups_ad,params_dict_lr)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.452224 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}


In [10]:
# test the best parameters for both methods on the on the adult test set
acc_acc_ad, fair_acc_ad, acc_tpr_diff_ad, fair_tpr_diff_ad = GetAccAndFairnessForBestParams(
    acc_params_ad,fair_params_ad,X_train_ad,y_train_ad,X_test_ad,
    y_test_ad,test_ad,unprivileged_groups_ad,privileged_groups_ad,'LogReg',train_ad_instance_weights)

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.8023612912031666
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.8023612912031666
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.44323071830519695
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.44323071830519695


In [11]:
# output the above results as a pandas dataframe and export png
params_df_ad = pd.DataFrame([acc_params_ad,fair_params_ad],index=['Most Accurate','Fairest'])
params_df_ad['Accuracy'] = [acc_acc_ad, fair_acc_ad]
params_df_ad['Fairness'] = [acc_tpr_diff_ad, fair_tpr_diff_ad]
dfi.export(params_df_ad, 'adult_dataframe_log_reg.png')
params_df_ad

[0518/012314.859265:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpns8931z4/temp.png.


Unnamed: 0,C,penalty,solver,Accuracy,Fairness
Most Accurate,0.01,l2,newton-cg,0.802361,-0.443231
Fairest,0.01,l2,newton-cg,0.802361,-0.443231


In [12]:
# define random forest parameter grid

# Number of trees in random forest
n_estimators = [10,50,100,500,1000]
# Number of features to consider at every split
max_features = ['auto','log2']
# Maximum number of levels in tree
max_depth = [10,50,100]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1]
# Method of selecting samples for training each tree
bootstrap = [True]
# define grid search for Accuracy
params_dict_rf = dict(n_estimators=n_estimators,max_features=max_features,max_depth=max_depth,
               min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf,bootstrap=bootstrap)

In [13]:
# accuracy grid search for random forest on the adult train dataset
model = RandomForestClassifier()
acc_score_ad_rf, acc_params_ad_rf, acc_result_ad_rf = GridSearchAccuracy(model,params_dict_rf,X_train_ad,y_train_ad,
                                                                         train_ad_instance_weights)

Accuracy
Best: 0.804850 using {'bootstrap': True, 'max_depth': 100, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}


In [14]:
# TPR grid search for random forest on the adult train dataset
fair_score_ad_rf, fair_params_ad_rf, tpr_diff_means_ad_rf, param_grid_dicts_ad_rf = GridSearchTPR(
    train_ad,'RForest',unprivileged_groups_ad,privileged_groups_ad,params_dict_rf)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.444728 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}


In [15]:
# now test the above parameters on the test set
acc_acc_ad_rf, fair_acc_ad_rf, acc_tpr_diff_ad_rf, fair_tpr_diff_ad_rf = GetAccAndFairnessForBestParams(
    acc_params_ad_rf,fair_params_ad_rf,X_train_ad,y_train_ad,X_test_ad,y_test_ad,
    test_ad,unprivileged_groups_ad,privileged_groups_ad,'RForest',train_ad_instance_weights)

{'bootstrap': True, 'max_depth': 100, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Accuracy 0.8009963829932437
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Accuracy 0.8004504197092746
{'bootstrap': True, 'max_depth': 100, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.4468718967229394
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.4276729559748428


In [16]:
# again, pandas dataframe and png output
params_df_ad_rf = pd.DataFrame([acc_params_ad_rf,fair_params_ad_rf],index=['Most Accurate','Fairest'])
params_df_ad_rf['Accuracy'] = [acc_acc_ad_rf, fair_acc_ad_rf]
params_df_ad_rf['Fairness'] = [acc_tpr_diff_ad_rf, fair_tpr_diff_ad_rf]
dfi.export(params_df_ad_rf, 'adult_dataframe_random_forest.png')
params_df_ad_rf

[0518/013128.860842:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpm9l7vc37/temp.png.


Unnamed: 0,bootstrap,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,Accuracy,Fairness
Most Accurate,True,100,auto,1,2,10,0.800996,-0.446872
Fairest,True,10,auto,1,2,10,0.80045,-0.427673


In [17]:
# Load German dataset with binary sensitive attribute as sex
privileged_groups_ge = [{'sex': 1}]
unprivileged_groups_ge = [{'sex': 0}]
ge_dataset = load_preproc_data_german(['sex'])

# Split between training and test set.
train_ge, test_ge = ge_dataset.split([0.7], shuffle=False)
train_ge_instance_weights = train_ge.instance_weights
print("training data size", train_ge.features.shape)
print("dataset feature names", train_ge.feature_names)

# Normalize the dataset, both train and test. This should always be done in any machine learning pipeline!
scale_orig = StandardScaler()
X_train_ge = scale_orig.fit_transform(train_ge.features)
y_train_ge = train_ge.labels.ravel()

X_test_ge = scale_orig.transform(test_ge.features) 
y_test_ge = test_ge.labels.ravel()

training data size (700, 11)
dataset feature names ['age', 'sex', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed']


In [18]:
# grid search for both metrics for logistic regression on the german dataset
model = LogisticRegression()
acc_score_ge, acc_params_ge, acc_result_ge = GridSearchAccuracy(model,params_dict_lr,X_train_ge,y_train_ge,
                                                                train_ge_instance_weights)

print("")
fair_score_ge, fair_params_ge, tpr_diff_means_ge, param_grid_dicts_ge = GridSearchTPR(
    train_ge,'LogReg',unprivileged_groups_ge,privileged_groups_ge,params_dict_lr)

Accuracy
Best: 0.705714 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.012121 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}


In [19]:
# grid search for both metrics for random forest on the german dataset
model = RandomForestClassifier()

acc_score_ge_rf, acc_params_ge_rf, acc_result_ge_rf = GridSearchAccuracy(model,params_dict_rf,X_train_ge,y_train_ge,
                                                                         train_ge_instance_weights)
    
print("")
fair_score_ge_rf, fair_params_ge_rf, tpr_diff_means_ge_rf, param_grid_dicts_ge_rf = GridSearchTPR(
    train_ge,'RForest',unprivileged_groups_ge,privileged_groups_ge,params_dict_rf)

Accuracy
Best: 0.697143 using {'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 1000}

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.104215 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


In [20]:
# test the most accurate and fairest logistic regression parameters on the german dataset
print("Logistic Regression")
acc_acc_ge, fair_acc_ge, acc_tpr_diff_ge, fair_tpr_diff_ge = GetAccAndFairnessForBestParams(
    acc_params_ge,fair_params_ge,X_train_ge,y_train_ge,X_test_ge,y_test_ge,test_ge,
    unprivileged_groups_ge,privileged_groups_ge,'LogReg',train_ge_instance_weights)

# test the most accurate and fairest random forest parameters on the german dataset
print("")
print("Random Forest")
acc_acc_ge_rf, fair_acc_ge_rf, acc_tpr_diff_ge_rf, fair_tpr_diff_ge_rf = GetAccAndFairnessForBestParams(
    acc_params_ge_rf,fair_params_ge_rf,X_train_ge,y_train_ge,X_test_ge,y_test_ge,
    test_ge,unprivileged_groups_ge,privileged_groups_ge,'RForest',train_ge_instance_weights)

Logistic Regression
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.6866666666666666
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.6866666666666666
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.01666666666666672
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.01666666666666672

Random Forest
{'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 1000} Accuracy 0.7
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500} Accuracy 0.7
{'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 1000} Fairness metric of equality of opportunity, i.e. true positive rate (T

In [21]:
# output logistic regression test results for german dataset as png
params_df_ge = pd.DataFrame([acc_params_ge,fair_params_ge],index=['Most Accurate','Fairest'])
params_df_ge['Accuracy'] = [acc_acc_ge, fair_acc_ge]
params_df_ge['Fairness'] = [acc_tpr_diff_ge, fair_tpr_diff_ge]
dfi.export(params_df_ge, 'german_dataframe_log_reg.png')
params_df_ge

[0518/013239.416284:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpr11q54mc/temp.png.


Unnamed: 0,C,penalty,solver,Accuracy,Fairness
Most Accurate,0.01,l2,newton-cg,0.686667,-0.016667
Fairest,0.01,l2,newton-cg,0.686667,-0.016667


In [22]:
# output random forest test results for german dataset as png
params_df_ge_rf = pd.DataFrame([acc_params_ge_rf,fair_params_ge_rf],index=['Most Accurate','Fairest'])
params_df_ge_rf['Accuracy'] = [acc_acc_ge_rf, fair_acc_ge_rf]
params_df_ge_rf['Fairness'] = [acc_tpr_diff_ge_rf, fair_tpr_diff_ge_rf]
dfi.export(params_df_ge_rf, 'german_dataframe_random_forest.png')
params_df_ge_rf

[0518/013241.248695:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpriklojnr/temp.png.


Unnamed: 0,bootstrap,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,Accuracy,Fairness
Most Accurate,True,,log2,1,2,1000,0.7,-0.132653
Fairest,True,10.0,auto,1,2,500,0.7,-0.132653


## Task 2

In [23]:
# REWEIGH ON ADULT
# Reweight adult dataset
ad_dataset_rw = Reweighing(unprivileged_groups=unprivileged_groups_ad,
                           privileged_groups=privileged_groups_ad).fit_transform(ad_dataset)

# Split between training and test set.
train_ad_rw, test_ad_rw = ad_dataset_rw.split([0.7], shuffle=False)
train_ad_rw_instance_weights = train_ad_rw.instance_weights
print("training data size", train_ad_rw.features.shape)
print("dataset feature names", train_ad_rw.feature_names)

# Normalize the dataset, both train and test. This should always be done in any machine learning pipeline!
scale_orig = StandardScaler()
X_train_ad_rw = scale_orig.fit_transform(train_ad_rw.features)
y_train_ad_rw = train_ad_rw.labels.ravel()

X_test_ad_rw = scale_orig.transform(test_ad_rw.features) 
y_test_ad_rw = test_ad_rw.labels.ravel()

training data size (34189, 18)
dataset feature names ['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']


In [24]:
# reweighted logistic regression accuracy grid search on adult set
model = LogisticRegression()
acc_score_ad_rw, acc_params_ad_rw, acc_result_ad_rw = GridSearchAccuracy(model,params_dict_lr,X_train_ad_rw,y_train_ad_rw,
                                                                         train_ad_rw_instance_weights)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}


In [25]:
# reweighted logistic regression fairness grid search on adult set
fair_score_ad_rw, fair_params_ad_rw, tpr_diff_means_ad_rw, param_grid_dicts_ad_rw = GridSearchTPR(
    train_ad_rw,'LogReg',unprivileged_groups_ad,privileged_groups_ad,params_dict_lr)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.011044 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}


In [26]:
# testing the best logistic regression parameters on the reweighted adult set
acc_acc_ad_rw, fair_acc_ad_rw, acc_tpr_diff_ad_rw, fair_tpr_diff_ad_rw = GetAccAndFairnessForBestParams(
    acc_params_ad_rw,fair_params_ad_rw,X_train_ad_rw,y_train_ad_rw,X_test_ad_rw,
    y_test_ad_rw,test_ad_rw,unprivileged_groups_ad,privileged_groups_ad,'LogReg',train_ad_rw_instance_weights)

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.7865966013785572
{'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.7865966013785572
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.03669471982898115
{'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference 0.004273622815339162


In [27]:
# png output for above results
params_df_ad_rw = pd.DataFrame([acc_params_ad_rw,fair_params_ad_rw],index=['Most Accurate','Fairest'])
params_df_ad_rw['Accuracy'] = [acc_acc_ad_rw, fair_acc_ad_rw]
params_df_ad_rw['Fairness'] = [acc_tpr_diff_ad_rw, fair_tpr_diff_ad_rw]
dfi.export(params_df_ad_rw, 'adult_dataframe_log_reg_reweigh.png')
params_df_ad_rw

[0518/013251.860568:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpf6lfg5fj/temp.png.


Unnamed: 0,C,penalty,solver,Accuracy,Fairness
Most Accurate,0.01,l2,newton-cg,0.786597,-0.036695
Fairest,100.0,l2,newton-cg,0.786597,0.004274


In [28]:
# reweighted random forest accuracy grid search on adult set
model = RandomForestClassifier()
acc_score_ad_rf_rw, acc_params_ad_rf_rw, acc_result_ad_rf_rw = GridSearchAccuracy(model,params_dict_rf,X_train_ad_rw,
                                                                                  y_train_ad_rw,
                                                                                  train_ad_rw_instance_weights)

Accuracy
Best: 0.804703 using {'bootstrap': True, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}


In [29]:
# reweighted random forest tpr difference grid search on adult set
fair_score_ad_rf_rw, fair_params_ad_rf_rw, tpr_diff_means_ad_rf_rw, param_grid_dicts_ad_rf_rw = GridSearchTPR(
    train_ad_rw,'RForest',unprivileged_groups_ad,privileged_groups_ad,params_dict_rf)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.101872 using {'bootstrap': True, 'max_depth': 50, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}


In [30]:
# test above parameters on reweighted adult test set
acc_acc_ad_rf_rw, fair_acc_ad_rf_rw, acc_tpr_diff_ad_rf_rw, fair_tpr_diff_ad_rf_rw = GetAccAndFairnessForBestParams(
    acc_params_ad_rf_rw,fair_params_ad_rf_rw,X_train_ad_rw,y_train_ad_rw,X_test_ad_rw,
    y_test_ad_rw,test_ad_rw,unprivileged_groups_ad,privileged_groups_ad,'RForest',train_ad_rw_instance_weights)

{'bootstrap': True, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Accuracy 0.7747218999522282
{'bootstrap': True, 'max_depth': 50, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Accuracy 0.7747218999522282
{'bootstrap': True, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference 0.10427935549847533
{'bootstrap': True, 'max_depth': 50, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference 0.10427935549847533


In [31]:
# output above results as png
params_df_ad_rf_rw = pd.DataFrame([acc_params_ad_rf_rw,fair_params_ad_rf_rw],index=['Most Accurate','Fairest'])
params_df_ad_rf_rw['Accuracy'] = [acc_acc_ad_rf_rw, fair_acc_ad_rf_rw]
params_df_ad_rf_rw['Fairness'] = [acc_tpr_diff_ad_rf_rw, fair_tpr_diff_ad_rf_rw]
dfi.export(params_df_ad_rf_rw, 'adult_dataframe_random_forest_reweigh.png')
params_df_ad_rf_rw

[0518/014108.625882:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmp6q042p8s/temp.png.


Unnamed: 0,bootstrap,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,Accuracy,Fairness
Most Accurate,True,50,log2,1,2,10,0.774722,0.104279
Fairest,True,50,auto,1,2,10,0.774722,0.104279


In [32]:
# REWEIGH ON GERMAN
# Reweight German dataset
ge_dataset_rw = Reweighing(unprivileged_groups=unprivileged_groups_ge,
                           privileged_groups=privileged_groups_ge).fit_transform(ge_dataset)

# Split between training and test set.
train_ge_rw, test_ge_rw = ge_dataset_rw.split([0.7], shuffle=False)
train_ge_rw_instance_weights = train_ge_rw.instance_weights
print("training data size", train_ge_rw.features.shape)
print("dataset feature names", train_ge_rw.feature_names)

# Normalize the dataset, both train and test. This should always be done in any machine learning pipeline!
scale_orig = StandardScaler()
X_train_ge_rw = scale_orig.fit_transform(train_ge_rw.features)
y_train_ge_rw = train_ge_rw.labels.ravel()

X_test_ge_rw = scale_orig.transform(test_ge_rw.features) 
y_test_ge_rw = test_ge_rw.labels.ravel()

training data size (700, 11)
dataset feature names ['age', 'sex', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed']


In [33]:
# reweighted logistic regression accuracy grid search on German set
model = LogisticRegression()
acc_score_ge_rw, acc_params_ge_rw, acc_result_ge_rw = GridSearchAccuracy(model,params_dict_lr,X_train_ge_rw,y_train_ge_rw,
                                                                         train_ge_rw_instance_weights)

Accuracy
Best: 0.705714 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}


In [34]:
# reweighted logistic regression fairness grid search on German set
fair_score_ge_rw, fair_params_ge_rw, tpr_diff_means_ge_rw, param_grid_dicts_ge_rw = GridSearchTPR(
    train_ge_rw,'LogReg',unprivileged_groups_ge,privileged_groups_ge,params_dict_lr)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.017537 using {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}


In [35]:
# test above parameters on reweighted test set
acc_acc_ge_rw, fair_acc_ge_rw, acc_tpr_diff_ge_rw, fair_tpr_diff_ge_rw = GetAccAndFairnessForBestParams(
    acc_params_ge_rw,fair_params_ge_rw,X_train_ge_rw,y_train_ge_rw,X_test_ge_rw,y_test_ge_rw,test_ge_rw,
    unprivileged_groups_ge,privileged_groups_ge,'LogReg',train_ge_rw_instance_weights)

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Accuracy 0.69
{'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'} Accuracy 0.68
{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference 0.0
{'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.02278911564625885


In [36]:
# png output of above results
params_df_ge_rw = pd.DataFrame([acc_params_ge_rw,fair_params_ge_rw],index=['Most Accurate','Fairest'])
params_df_ge_rw['Accuracy'] = [acc_acc_ge_rw, fair_acc_ge_rw]
params_df_ge_rw['Fairness'] = [acc_tpr_diff_ge_rw, fair_tpr_diff_ge_rw]
dfi.export(params_df_ge_rw, 'german_dataframe_log_reg_reweigh.png')
params_df_ge_rw

[0518/014112.802604:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpnjt9s4cj/temp.png.


Unnamed: 0,C,penalty,solver,Accuracy,Fairness
Most Accurate,0.01,l2,newton-cg,0.69,0.0
Fairest,0.01,l2,liblinear,0.68,-0.022789


In [37]:
# reweighted random forest accuracy grid search on German set
model = RandomForestClassifier()
acc_score_ge_rf_rw, acc_params_ge_rf_rw, acc_result_ge_rf_rw = GridSearchAccuracy(model,params_dict_rf,X_train_ge_rw,
                                                                                  y_train_ge_rw,
                                                                                  train_ge_rw_instance_weights)

Accuracy
Best: 0.700000 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}


In [38]:
# reweighted random forest fairness grid search on German set
fair_score_ge_rf_rw, fair_params_ge_rf_rw, tpr_diff_means_ge_rf_rw, param_grid_dicts_ge_rf_rw = GridSearchTPR(
    train_ge_rw,'RForest',unprivileged_groups_ge,privileged_groups_ge,params_dict_rf)

Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.080178 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


In [39]:
# test above parameters on test set
acc_acc_ge_rf_rw, fair_acc_ge_rf_rw, acc_tpr_diff_ge_rf_rw, fair_tpr_diff_ge_rf_rw = GetAccAndFairnessForBestParams(
    acc_params_ge_rf_rw,fair_params_ge_rf_rw,X_train_ge_rw,y_train_ge_rw,X_test_ge_rw,y_test_ge_rw,test_ge_rw,
    unprivileged_groups_ge,privileged_groups_ge,'RForest',train_ge_rw_instance_weights)

{'bootstrap': True, 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Accuracy 0.7033333333333334
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500} Accuracy 0.7
{'bootstrap': True, 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.13945578231292544
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500} Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference -0.13265306122448972


In [40]:
# png output of above results
params_df_ge_rf_rw = pd.DataFrame([acc_params_ge_rf_rw,fair_params_ge_rf_rw],index=['Most Accurate','Fairest'])
params_df_ge_rf_rw['Accuracy'] = [acc_acc_ge_rf_rw, fair_acc_ge_rf_rw]
params_df_ge_rf_rw['Fairness'] = [acc_tpr_diff_ge_rf_rw, fair_tpr_diff_ge_rf_rw]
dfi.export(params_df_ge_rf_rw, 'german_dataframe_random_forest_reweigh.png')
params_df_ge_rf_rw

[0518/014219.325299:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpa0lce3fk/temp.png.


Unnamed: 0,bootstrap,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,Accuracy,Fairness
Most Accurate,True,10,log2,1,2,10,0.703333,-0.139456
Fairest,True,10,auto,1,2,500,0.7,-0.132653


## Task 3

In [41]:
# define all methods and parameters to try
methods = ['LogReg', 'RForest']
grids = [params_dict_lr, params_dict_rf]

In [53]:
# find best criterion model and parameters for the adult dataset with default tolerance (10%)
best_model_ad1, best_params_ad1, best_acc_ad1, best_fairness_ad1 = FindBestModel(
    methods,grids,X_train_ad,y_train_ad,train_ad,train_ad_instance_weights,unprivileged_groups_ad,
    privileged_groups_ad,X_test_ad,y_test_ad,test_ad)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.804791 using {'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.452224 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.444728 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}
Best model: RForest using params {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 10, 'bootstrap': True}, train accuracy: 0.8040307194183637, train TPR difference -0.44472832020787145, test accuracy: 0.8004504197092746 and test TPR difference: -0.4276729559748428


In [54]:
# find best criterion model and parameters for the adult dataset with 0.01% tolerance
best_model_ad2, best_params_ad2, best_acc_ad2, best_fairness_ad2 = FindBestModel(
    methods,grids,X_train_ad,y_train_ad,train_ad,train_ad_instance_weights,unprivileged_groups_ad,
    privileged_groups_ad,X_test_ad,y_test_ad,test_ad,0.0001)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.804674 using {'bootstrap': True, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.452224 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Best model: LogReg using params {'solver': 'newton-cg', 'penalty': 'l2', 'C': 0.01}, train accuracy: 0.8047619273739061, train TPR difference -0.4522235201576622, test accuracy: 0.8023612912031666 and test TPR difference: -0.44323071830519695


In [44]:
# find best criterion model and parameters for the reweighted adult dataset with default tolerance (10%)
best_model_ad_rw1, best_params_ad_rw1, best_acc_ad_rw1, best_fairness_ad_rw1 = FindBestModel(
    methods,grids,X_train_ad_rw,y_train_ad_rw,train_ad_rw,train_ad_rw_instance_weights,
    unprivileged_groups_ad,privileged_groups_ad,X_test_ad_rw,y_test_ad_rw,test_ad_rw)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.804762 using {'bootstrap': True, 'max_depth': 100, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.011044 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.101872 using {'bootstrap': True, 'max_depth': 50, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}
Best model: RForest using params {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 50, 'bootstrap': True}, train accuracy: 0.8044694441916892, train TPR difference 0.10187193301989055, test accuracy: 0.7747218999522282 and test TPR difference: 0.10427935549847533


In [55]:
# find best criterion model and parameters for the reweighted adult dataset with 0.1% tolerance
best_model_ad_rw2, best_params_ad_rw2, best_acc_ad_rw2, best_fairness_ad_rw2 = FindBestModel(
    methods,grids,X_train_ad_rw,y_train_ad_rw,train_ad_rw,train_ad_rw_instance_weights,
    unprivileged_groups_ad,privileged_groups_ad,X_test_ad_rw,y_test_ad_rw,test_ad_rw,0.0001)

Accuracy
Best: 0.804762 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.804674 using {'bootstrap': True, 'max_depth': None, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.011044 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
Best model: LogReg using params {'solver': 'newton-cg', 'penalty': 'l2', 'C': 0.01}, train accuracy: 0.8047619273739061, train TPR difference 0.007152498955769327, test accuracy: 0.7865966013785572 and test TPR difference: -0.03669471982898115


In [56]:
# find best criterion model and parameters for the german dataset with default tolerance (10%)
best_model_ge, best_params_ge, best_acc_ge, best_fairness_ge = FindBestModel(
    methods,grids,X_train_ge,y_train_ge,train_ge,train_ge_instance_weights,unprivileged_groups_ge,
    privileged_groups_ge,X_test_ge,y_test_ge,test_ge)

Accuracy
Best: 0.705714 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.701429 using {'bootstrap': True, 'max_depth': None, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.012121 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.104215 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Best model: LogReg using params {'solver': 'newton-cg', 'penalty': 'l2', 'C': 0.01}, train accuracy: 0.7057142857142856, train TPR difference -0.01212121212121211, test accuracy: 0.6866666666666666 and test TPR difference: -0.01666666666666672


In [57]:
# find best criterion model and parameters for the reweighted german dataset with default tolerance (10%)
best_model_ge_rw1, best_params_ge_rw1, best_acc_ge_rw1, best_fairness_ge_rw1 = FindBestModel(
    methods,grids,X_train_ge_rw,y_train_ge_rw,train_ge_rw,train_ge_rw_instance_weights,
    unprivileged_groups_ge,privileged_groups_ge,X_test_ge_rw,y_test_ge_rw,test_ge_rw)

Accuracy
Best: 0.705714 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.702857 using {'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.017537 using {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.080178 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Best model: LogReg using params {'solver': 'liblinear', 'penalty': 'l2', 'C': 0.01}, train accuracy: 0.687142857142857, train TPR difference 0.017536551930144785, test accuracy: 0.68 and test TPR difference: -0.02278911564625885


In [58]:
# find best criterion model and parameters for the reweighted german dataset with 1% tolerance
best_model_ge_rw2, best_params_ge_rw2, best_acc_ge_rw2, best_fairness_ge_rw2 = FindBestModel(
    methods,grids,X_train_ge_rw,y_train_ge_rw,train_ge_rw,train_ge_rw_instance_weights,
    unprivileged_groups_ge,privileged_groups_ge,X_test_ge_rw,y_test_ge_rw,test_ge_rw,0.01)

Accuracy
Best: 0.705714 using {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Accuracy
Best: 0.704286 using {'bootstrap': True, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: 0.017537 using {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}
Fairness metric of equality of opportunity, i.e. true positive rate (TPR) difference
Best: -0.080178 using {'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Best model: LogReg using params {'solver': 'newton-cg', 'penalty': 'l2', 'C': 100}, train accuracy: 0.7000000000000001, train TPR difference 0.010356051271383237, test accuracy: 0.68 and test TPR difference: -0.02278911564625885


In [59]:
# this code cell outputs all of the best model parameters and corresponding metrics as a pandas dataframe
best_model_list = [best_model_ad1, best_model_ad2, best_model_ad_rw1, best_model_ad_rw2, 
                   best_model_ge, best_model_ge_rw1, best_model_ge_rw2]
best_params_list_of_dicts = [best_params_ad1, best_params_ad2, best_params_ad_rw1, best_params_ad_rw2, 
                             best_params_ge, best_params_ge_rw1, best_params_ge_rw2]
best_params_dict_of_lists = {}
all_params = []
for i in params_dict_lr.keys():
    all_params.append(i)
for i in params_dict_rf.keys():
    all_params.append(i)
for i in all_params:
    best_params_dict_of_lists[i] = []
    for j in best_params_list_of_dicts:
        best_params_dict_of_lists[i].append(j.get(i,'N/A'))
best_acc_list = [best_acc_ad1, best_acc_ad2, best_acc_ad_rw1, best_acc_ad_rw2, 
                 best_acc_ge, best_acc_ge_rw1, best_acc_ge_rw2]
best_fairness_list = [best_fairness_ad1, best_fairness_ad2, best_fairness_ad_rw1, best_fairness_ad_rw2, 
                      best_fairness_ge, best_fairness_ge_rw1, best_fairness_ge_rw2]
best_dict = {}
best_dict.update({'model': best_model_list})
best_dict.update(best_params_dict_of_lists)
best_dict.update({'accuracy': best_acc_list})
best_dict.update({'TPR difference': best_fairness_list})
best_df = pd.DataFrame(
    best_dict, index = ['Adult 0.1 Tolerance', 'Adult 0.001 Tolerance', 'Reweighted Adult 0.1 Tolerance',
                        'Reweighted Adult 0.001 Tolerance', 'German 0.1 Tolerance', 
                        'Reweighted German 0.1 Tolerance', 'Reweighted German 0.01 Tolerance'])
best_df

Unnamed: 0,model,solver,penalty,C,n_estimators,max_features,max_depth,min_samples_split,min_samples_leaf,bootstrap,accuracy,TPR difference
Adult 0.1 Tolerance,RForest,,,,10.0,auto,10.0,2.0,1.0,True,0.80045,-0.427673
Adult 0.001 Tolerance,LogReg,newton-cg,l2,0.01,,,,,,,0.802361,-0.443231
Reweighted Adult 0.1 Tolerance,RForest,,,,10.0,auto,50.0,2.0,1.0,True,0.774722,0.104279
Reweighted Adult 0.001 Tolerance,LogReg,newton-cg,l2,0.01,,,,,,,0.786597,-0.036695
German 0.1 Tolerance,LogReg,newton-cg,l2,0.01,,,,,,,0.686667,-0.016667
Reweighted German 0.1 Tolerance,LogReg,liblinear,l2,0.01,,,,,,,0.68,-0.022789
Reweighted German 0.01 Tolerance,LogReg,newton-cg,l2,100.0,,,,,,,0.68,-0.022789


In [60]:
# this exports the above dataframe as a png
dfi.export(best_df, 'best_df.png')

[0518/123344.507223:INFO:headless_shell.cc(660)] Written to file /var/folders/n7/079skxw56td31dml628vdw9h0000gn/T/tmpbqaeqpu9/temp.png.
