In [6]:
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append("../")
from aif360.algorithms.inprocessing.gerryfair_classifier import *
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import *
from sklearn import svm
from sklearn import tree
from sklearn.kernel_ridge import KernelRidge
from sklearn import linear_model
from aif360.metrics.binary_label_dataset_metric import *
from IPython.display import Image
import pickle
import matplotlib.pyplot as plt

# load data set
data_set = load_preproc_data_adult(sub_samp=1000, balance=True)
max_iterations = 10
C = 100
print_flag = True
gamma = .005

fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',
             max_iters=max_iterations, heatmapflag=False)
# fit method
fair_model.fit(data_set, early_termination=True)

# predict method. If threshold in (0, 1) produces binary predictions
dataset_yhat = fair_model.predict(data_set, threshold=False)


iteration: 1, error: 0.265, fairness violation: 0.051400000000000015, violated group size: 0.28
iteration: 2, error: 0.3825, fairness violation: 0.025700000000000008, violated group size: 0.28
iteration: 3, error: 0.42166666666666663, fairness violation: 0.017133333333333337, violated group size: 0.28
iteration: 4, error: 0.44125, fairness violation: 0.012850000000000004, violated group size: 0.28
iteration: 5, error: 0.453, fairness violation: 0.010280000000000001, violated group size: 0.22
iteration: 6, error: 0.4608333333333333, fairness violation: 0.00856666666666667, violated group size: 0.28
iteration: 7, error: 0.46642857142857147, fairness violation: 0.007342857142857144, violated group size: 0.28
iteration: 8, error: 0.470625, fairness violation: 0.006425000000000002, violated group size: 0.22
iteration: 9, error: 0.47388888888888897, fairness violation: 0.005711111111111118, violated group size: 0.28


In [7]:
# auditing 

gerry_metric = BinaryLabelDatasetMetric(data_set)
gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')
print(gamma_disparity)



0.005711111111111111


In [8]:
# set to 10 iterations for fast running of notebook - set >= 1000 when running real experiments
# tests learning with different hypothesis classes
pareto_iters = 10
def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01], save_results=False, iters=pareto_iters):

    ln_predictor = linear_model.LinearRegression()
    svm_predictor = svm.LinearSVR()
    tree_predictor = tree.DecisionTreeRegressor(max_depth=3)
    kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')
    predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},
                      'SVR': {'predictor': svm_predictor, 'iters': iters},
                      'Tree': {'predictor': tree_predictor, 'iters': iters},
                      'Kernel': {'predictor': kernel_predictor, 'iters': iters}}

    results_dict = {}

    for pred in predictor_dict:
        print('Curr Predictor: {}'.format(pred))
        predictor = predictor_dict[pred]['predictor']
        max_iters = predictor_dict[pred]['iters']
        fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)
        fair_clf.printflag = False
        fair_clf.max_iters=max_iters
        errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)
        results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}
    if save_results:
        pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))

multiple_classifiers_pareto(data_set)



Curr Predictor: Linear
Curr Predictor: SVR
Curr Predictor: Tree
Curr Predictor: Kernel
