In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.inprocessing import PrejudiceRemover
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing

from sklearn.metrics import accuracy_score
from scipy.stats import mode
import pandas as pd
import numpy as np
import errno
import math
import copy
from copy import deepcopy
import csv
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
def preprocess_compasdataset(df):
    df = df[['age', 'c_charge_degree', 'race', 'age_cat', 'score_text',
                 'sex', 'priors_count', 'days_b_screening_arrest', 'decile_score',
                 'is_recid', 'two_year_recid', 'c_jail_in', 'c_jail_out']]

    # Indices of data samples to keep
    ix = df['days_b_screening_arrest'] <= 30
    ix = (df['days_b_screening_arrest'] >= -30) & ix
    ix = (df['is_recid'] != -1) & ix
    ix = (df['c_charge_degree'] != "O") & ix
    ix = (df['score_text'] != 'N/A') & ix
    df = df.loc[ix,:]
    df['length_of_stay'] = (pd.to_datetime(df['c_jail_out']) - pd.to_datetime(df['c_jail_in'])).apply(lambda x: x.days)

    # Restrict races to African-American and Caucasian
    df = df.loc[~df['race'].isin(['Native American','Hispanic','Asian','Other']),:]

    df = df[['sex','race','age_cat','c_charge_degree','score_text','priors_count','is_recid', 'two_year_recid','length_of_stay']]

    df['priors_count'] = df['priors_count'].apply(lambda x: 0 if x <= 0 else ('1 to 3' if 1 <= x <= 3 else 'More than 3'))
    df['length_of_stay'] = df['length_of_stay'].apply(lambda x: '<week' if x <= 7 else ('<3months' if 8 < x <= 93 else '>3months'))
    df['score_text'] = df['score_text'].apply(lambda x: 'MediumHigh' if (x == 'High')| (x == 'Medium') else x)
    df['age_cat'] = df['age_cat'].apply(lambda x: '25 to 45' if x == '25 - 45' else x)

    df['sex'] = df['sex'].replace({'Female': 1.0, 'Male': 0.0})
    df['race'] = df['race'].apply(lambda x: 1.0 if x == 'Caucasian' else 0.0)

    df = df[['two_year_recid', 'sex', 'race', 'age_cat', 'priors_count', 'c_charge_degree']]

    protected_attributes = ['sex', 'race']
    label_name = 'two_year_recid'
    categorical_features = ['age_cat', 'priors_count', 'c_charge_degree']
    features = categorical_features + [label_name] + protected_attributes

    # privileged classes
    privileged_classes = {"sex": [1.0], "race": [1.0]}

    # protected attribute maps
    protected_attribute_map = {"sex": {0.0: 'Male', 1.0: 'Female'},
                                "race": {1.0: 'Caucasian', 0.0: 'Not Caucasian'}}


    data = StandardDataset(df, label_name, favorable_classes=[0],
                           protected_attribute_names=protected_attributes,
                           privileged_classes=[privileged_classes[x] for x in protected_attributes],
                           categorical_features=categorical_features,
                           features_to_keep=features,
                           metadata={'label_maps': [{1.0: 'Did recid.', 0.0: 'No recid.'}],
                                     'protected_attribute_maps': [protected_attribute_map[x] for x in protected_attributes]})

    return data

In [None]:
def preprocess_germandataset(df):
    def group_credit_hist(x):
        if x in ['no credits taken/ all credits paid back duly', 'all credits at this bank paid back duly', 'existing credits paid back duly till now']:
            return 'None/Paid'
        elif x == 'delay in paying off in the past':
            return 'Delay'
        elif x == 'critical account/ other credits existing (not at this bank)':
            return 'Other'
        else:
            return 'NA'

    def group_employ(x):
        if x == 'unemployed':
            return 'Unemployed'
        elif x in ['... < 1 year ', '1 <= ... < 4 years']:
            return '1-4 years'
        elif x in ['4 <= ... < 7 years', '.. >= 7 years']:
            return '4+ years'
        else:
            return 'NA'

    def group_savings(x):
        if x in ['... < 100 DM', '100 <= ... < 500 DM']:
            return '<500'
        elif x in ['500 <= ... < 1000 DM ', '.. >= 1000 DM ']:
            return '500+'
        elif x == 'unknown/ no savings account':
            return 'Unknown/None'
        else:
            return 'NA'

    def group_status(x):
        if x in ['< 0 DM', '0 <= ... < 200 DM']:
            return '<200'
        elif x in ['>= 200 DM / salary assignments for at least 1 year']:
            return '200+'
        elif x == 'no checking account':
            return 'None'
        else:
            return 'NA'

    status_map = {'male : divorced/separated': 1.0,
                'male : single': 1.0,
                'male : married/widowed': 1.0,
                'female : divorced/separated/married': 0.0,
                'female : single': 0.0}

    df['personal_status_sex'] = df['personal_status_sex'].replace(status_map)
    df['credit_history'] = df['credit_history'].apply(lambda x: group_credit_hist(x))
    df['savings'] = df['savings'].apply(lambda x: group_savings(x))
    df['present_emp_since'] = df['present_emp_since'].apply(lambda x: group_employ(x))
    df['age'] = df['age'].apply(lambda x: np.float(x >= 25))
    df['account_check_status'] = df['account_check_status'].apply(lambda x: group_status(x))

    df = df.rename(columns = {'default': 'credit', 'present_emp_since': 'employment', 'account_check_status': 'status', 'personal_status_sex': 'sex'})

    protected_attribute = ['sex', 'age']
    label_name = 'credit'
    categorical_features = ['credit_history', 'savings', 'employment']
    features = categorical_features + [label_name] + protected_attribute

    privileged_class = {'sex': [1.0], 'age': [1.0]}

    protected_attribute_map = {"sex": {1.0: 'male', 0.0: 'female'},
                            "age": {1.0: 'old', 0.0: 'young'}}

    data = StandardDataset(df, label_name, favorable_classes=[1],
                            protected_attribute_names=protected_attribute,
                            privileged_classes=[privileged_class[x] for x in protected_attribute],
                            categorical_features=categorical_features,
                            features_to_keep=features,
                            metadata={'label_maps': [{1.0: 'Good Credit', 2.0: 'Bad Credit'}],
                                    'protected_attribute_maps': [protected_attribute_map[x] for x in protected_attribute]})

    return data

In [None]:
############ Reweighing ##############

def reweighing_data(train, unprivileged_group, privileged_group):
    RW = Reweighing(unprivileged_groups=unprivileged_group, privileged_groups=privileged_group)
    RW.fit(train)
    train_transformed = RW.transform(train)

    # change weights to whole numbers
    for i in range(train_transformed.instance_weights.size):
        train_transformed.instance_weights[i] = (round(train_transformed.instance_weights[i] / 0.1) * 0.1) * 10
        weights = copy.deepcopy(train_transformed.instance_weights)

    # change train_transformed.features and train_transformed.labels and train_transformed.protected_attributes according to the weights of each instance
    for i in range(train_transformed.features.shape[0]):
        row = copy.deepcopy(train_transformed.features[i])
        row_label = copy.deepcopy(train_transformed.labels[i])
        row_protected_attributes = copy.deepcopy(train_transformed.protected_attributes[i])
        row_protected_attributes.resize(1,2)
        row.resize(1,train_transformed.features.shape[1])
        row_label.resize(1,1)
        weight = int(weights[i])
        for j in range(weight-1):
            train_transformed.features = np.concatenate((train_transformed.features,row))
            train_transformed.labels = np.concatenate((train_transformed.labels,row_label))
            train_transformed.protected_attributes = np.concatenate((train_transformed.protected_attributes,row_protected_attributes))

    # change the train_transformed to a numpy array of ones to match number of rows in features
    train_transformed.instance_weights = np.ones(train_transformed.features.shape[0])

    return train_transformed

In [None]:
def output_rates(input_data, output_data, attribute_name, privileged=None, unprivileged=None, favourable=None, unfavourable=None):

    index_attribute = input_data.feature_names.index(attribute_name)
    privileged = float(privileged)
    unprivileged = float(unprivileged)
    
    input_priv = input_data.labels[np.where(input_data.features[:,index_attribute] == privileged)]
    output_priv = output_data.labels[np.where(output_data.features[:,index_attribute] == privileged)]
    priv_labels = np.concatenate((input_priv, output_priv), axis=1)
    
    input_unpriv = input_data.labels[np.where(input_data.features[:,index_attribute] == unprivileged)]
    output_unpriv = output_data.labels[np.where(output_data.features[:,index_attribute] == unprivileged)]
    unpriv_labels = np.concatenate((input_unpriv, output_unpriv), axis=1)
    
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    for i in range(priv_labels.shape[0]):
        input_label = priv_labels[i][0]
        output_label = priv_labels[i][1]
        if input_label == output_label:
            if input_label == unfavourable:
                tn = tn + 1
            else:
                tp = tp + 1
        else:
            if input_label == favourable and output_label == unfavourable:
                fn = fn + 1
            else:
                fp = fp + 1
    
    rates_privileged = [tp,fp,tn,fn]
    
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    for i in range(unpriv_labels.shape[0]):
        input_label = unpriv_labels[i][0]
        output_label = unpriv_labels[i][1]
        if input_label == output_label:
            if input_label == unfavourable:
                tn = tn + 1
            else:
                tp = tp + 1
        else:
            if input_label == favourable and output_label == unfavourable:
                fn = fn + 1
            else:
                fp = fp + 1
                
    rates_unprivileged = [tp,fp,tn,fn]  
    
    rates_list = [rates_privileged, rates_unprivileged]
    
    return rates_list

In [None]:
def equal_opp_diff(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable):
    rates_both = output_rates(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable)
    
    # [tp, fp, tn, fn]
    outcome_privileged = rates_both[0]
    outcome_unprivileged = rates_both[1]
    
    # true positive rate = tp / (tp + fn)
    tpr_privileged = outcome_privileged[0] / (outcome_privileged[0] + outcome_privileged[3])
    tpr_unprivileged = outcome_unprivileged[0] / (outcome_unprivileged[0] + outcome_unprivileged[3])

    fairness = tpr_unprivileged - tpr_privileged
    
    return fairness

In [None]:
def avg_odds_diff(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable):
    rates_both = output_rates(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable)
    
    # [tp, fp, tn, fn]
    outcome_privileged = rates_both[0]
    outcome_unprivileged = rates_both[1]
    
    # true positive rate = tp / (tp + fn)
    tpr_privileged = outcome_privileged[0] / (outcome_privileged[0] + outcome_privileged[3])
    tpr_unprivileged = outcome_unprivileged[0] / (outcome_unprivileged[0] + outcome_unprivileged[3])

    # false positive rate = fp / (fp + tn)
    fpr_privileged = outcome_privileged[1] / (outcome_privileged[1] + outcome_privileged[2])
    fpr_unprivileged = outcome_unprivileged[1] / (outcome_unprivileged[1] + outcome_unprivileged[2])
    
    fpr_diff = fpr_unprivileged - fpr_privileged
    tpr_diff = tpr_unprivileged - tpr_unprivileged
    
    fairness = (fpr_diff + tpr_diff) * 0.5
    
    return fairness

In [None]:
def make_prediction(train, test, unprivileged_groups, privileged_groups):
    
    ################## adversarial debiasing #################

    sess = tf.Session()
    debiased_model_reweighing = AdversarialDebiasing(privileged_groups = privileged_groups,
                                                     unprivileged_groups = unprivileged_groups,
                                                     scope_name='debiased_classifier', debias=True, sess=sess)
    debiased_model_reweighing.fit(train)
    dataset_debiasing_test_reweighing = debiased_model_reweighing.predict(test)
    sess.close()
    tf.reset_default_graph()

    ##################### metrics #####################

    metric_test = BinaryLabelDatasetMetric(dataset_debiasing_test_reweighing,
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test, dataset_debiasing_test_reweighing,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)

    accuracy_adversarial = accuracy_score(y_true = test.labels, y_pred = dataset_debiasing_test_reweighing.labels)
    
    metrics_adversarial = [metric_test.mean_difference(),acc_test.disparate_impact(), acc_test.equal_opportunity_difference(), acc_test.average_odds_difference(), acc_test.theil_index()]


    ##################### prejudice remover #####################
    prejudice_model_reweighing = PrejudiceRemover(eta=100, sensitive_attr='sex')
    prejudice_model_reweighing.fit(train)
    dataset_prejudice_test_reweighing = prejudice_model_reweighing.predict(test)

    ##################### metrics #####################

    metric_test = BinaryLabelDatasetMetric(dataset_prejudice_test_reweighing,
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test, dataset_prejudice_test_reweighing,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    accuracy_prejudice = accuracy_score(y_true=test.labels, y_pred=dataset_prejudice_test_reweighing.labels)
    
    equal_opportunity_difference = equal_opp_diff(test, dataset_debiasing_test_reweighing,
                                                  'sex', privileged=1, unprivileged=0, favourable=1, unfavourable=0)
    
    average_odds_difference = avg_odds_diff(test, dataset_debiasing_test_reweighing,
                                                  'sex', privileged=1, unprivileged=0, favourable=1, unfavourable=0)
    
    if acc_test.disparate_impact() == math.inf:
        disparate_impact = 5.0
    else:
        disparate_impact = acc_test.disparate_impact()
    
    metrics_prejudice = [metric_test.mean_difference(), disparate_impact, equal_opportunity_difference, average_odds_difference, acc_test.theil_index()]


    ##################### normal neural net #####################
    sess = tf.Session()
    neural_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                                        unprivileged_groups = unprivileged_groups,
                                        scope_name='debiased_classifier', debias=False, sess=sess)
    neural_model.fit(train)
    dataset_neural_test = neural_model.predict(test)
    sess.close()
    tf.reset_default_graph()

    ##################### metrics #####################

    metric_test = BinaryLabelDatasetMetric(dataset_neural_test,
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test, dataset_neural_test,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    accuracy_nondebiasing = accuracy_score(y_true=test.labels, y_pred=dataset_neural_test.labels)

    metrics_nondebiasing = [metric_test.mean_difference(),acc_test.disparate_impact(), acc_test.equal_opportunity_difference(), acc_test.average_odds_difference(), acc_test.theil_index()]


    ##################### ensemble #####################
    pred_labels_test = []
    for i in range(0, len(test.features)):
        arr_test = mode([dataset_debiasing_test_reweighing.labels[i], dataset_prejudice_test_reweighing.labels[i], dataset_neural_test.labels[i]])
        pred_labels_test.append(arr_test[0][0])
        dataset_ensemble_test = test.copy()
        dataset_ensemble_test.labels = np.array(pred_labels_test)


    ##################### metrics #####################

    metric_test = BinaryLabelDatasetMetric(dataset_ensemble_test,
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test, dataset_ensemble_test,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    accuracy_ensemble = accuracy_score(y_true=test.labels, y_pred=dataset_ensemble_test.labels)
    
    metrics_ensemble = [metric_test.mean_difference(),acc_test.disparate_impact(), acc_test.equal_opportunity_difference(), acc_test.average_odds_difference(), acc_test.theil_index()]

    accuracy_scores = [accuracy_adversarial, accuracy_prejudice, accuracy_nondebiasing, accuracy_ensemble]
    fairness_metrics = [metrics_adversarial, metrics_prejudice, metrics_nondebiasing, metrics_ensemble]
    
    return accuracy_scores, fairness_metrics

In [None]:
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

print("Classification with Compas data set\n")
df = pd.read_csv('dataset/compas-scores-two-years.csv')
dataset_orig = preprocess_compasdataset(df)

matrix_accuracy_reweigh = {}
matrix_accuracy_nonreweigh = {}
matrix_fairness_reweigh = {}
matrix_fairness_nonreweigh = {}

runs = 10

for i in range(0, runs):
    print('run =', i+1)
    
    train, test = dataset_orig.split([0.7], shuffle=True)
    train_transformed = reweighing_data(train, unprivileged_groups, privileged_groups)
    
    # with reweighing
    accuracy_reweigh, fairness_metrics_reweigh = make_prediction(train_transformed, test, unprivileged_groups, privileged_groups)
    
    # Without reweighing
    accuracy_nonreweigh, fairness_metrics_nonreweigh = make_prediction(train, test, unprivileged_groups, privileged_groups)
    
    # store values for each run
    matrix_accuracy_reweigh[i] = accuracy_reweigh
    matrix_fairness_reweigh[i] = fairness_metrics_reweigh
    matrix_accuracy_nonreweigh[i] = accuracy_nonreweigh
    matrix_fairness_nonreweigh[i] = fairness_metrics_nonreweigh

print('\nprediction completed')

metrics_adversarial_reweigh = []
metrics_prejudice_reweigh = []
metrics_nondebiasing_reweigh = []
metrics_ensemble_reweigh = []
metrics_adversarial_nonreweigh = []
metrics_prejudice_nonreweigh = []
metrics_nondebiasing_nonreweigh = []
metrics_ensemble_nonreweigh = []

# mean_difference_reweigh = []
# disparate_impact_reweigh = []
# equal_opportunity_difference_reweigh = []
# average_odds_difference_reweigh = []
# theil_index_reweigh = []
# mean_difference_nonreweigh = []
# disparate_impact_nonreweigh = []
# equal_opportunity_difference_nonreweigh = []
# average_odds_difference_nonreweigh = []
# theil_index_nonreweigh = []

for i in range(0, runs):
    
    # with reweighing
    metrics_adversarial_reweigh.append(matrix_fairness_reweigh[i][0])
    metrics_prejudice_reweigh.append(matrix_fairness_reweigh[i][1])
    metrics_nondebiasing_reweigh.append(matrix_fairness_reweigh[i][2])
    metrics_ensemble_reweigh.append(matrix_fairness_reweigh[i][3])
    
    # without reweighing
    metrics_adversarial_nonreweigh.append(matrix_fairness_nonreweigh[i][0])
    metrics_prejudice_nonreweigh.append(matrix_fairness_nonreweigh[i][1])
    metrics_nondebiasing_nonreweigh.append(matrix_fairness_nonreweigh[i][2])
    metrics_ensemble_nonreweigh.append(matrix_fairness_nonreweigh[i][3])

print('compiled all metrics')


# create data frame for all metrics
columns = ['Adversarial Debiasing', 'Prejudice Remover', 'Nondebiasing', 'Ensemble']
accuracy_reweigh = np.array(list(matrix_accuracy_reweigh.values()))
accuracy_nonreweigh = np.array(list(matrix_accuracy_nonreweigh.values()))
compas_accuracy_reweigh = pd.DataFrame(accuracy_reweigh, columns=columns)
compas_accuracy_nonreweigh = pd.DataFrame(accuracy_nonreweigh, columns=columns)

# fairness metrics
columns = ['Mean Difference', 'Disparate Impact', 'Equal Opportunity Difference', 'Average Odds Difference', 'Theil Index']
compas_adversarial_reweigh = pd.DataFrame(metrics_adversarial_reweigh, columns=columns)
compas_prejudice_reweigh = pd.DataFrame(metrics_prejudice_reweigh, columns=columns)
compas_nondebiasing_reweigh = pd.DataFrame(metrics_nondebiasing_reweigh, columns=columns)
compas_ensemble_reweigh = pd.DataFrame(metrics_ensemble_reweigh, columns=columns)

compas_adversarial_nonreweigh = pd.DataFrame(metrics_adversarial_nonreweigh, columns=columns)
compas_prejudice_nonreweigh = pd.DataFrame(metrics_prejudice_nonreweigh, columns=columns)
compas_nondebiasing_nonreweigh = pd.DataFrame(metrics_nondebiasing_nonreweigh, columns=columns)
compas_ensemble_nonreweigh = pd.DataFrame(metrics_ensemble_nonreweigh, columns=columns)


# save to csv
compas_accuracy_reweigh.to_csv("results/compas/reweighed/accuracy.csv", encoding='utf-8')
compas_adversarial_reweigh.to_csv("results/compas/reweighed/adversarial.csv", encoding='utf-8')
compas_prejudice_reweigh.to_csv("results/compas/reweighed/prejudice.csv", encoding='utf-8')
compas_nondebiasing_reweigh.to_csv("results/compas/reweighed/neural_net.csv", encoding='utf-8')
compas_ensemble_reweigh.to_csv("results/compas/reweighed/ensemble.csv", encoding='utf-8')

compas_accuracy_nonreweigh.to_csv("results/compas/non-reweighed/accuracy.csv", encoding='utf-8')
compas_adversarial_nonreweigh.to_csv("results/compas/non-reweighed/adversarial.csv", encoding='utf-8')
compas_prejudice_nonreweigh.to_csv("results/compas/non-reweighed/prejudice.csv", encoding='utf-8')
compas_nondebiasing_nonreweigh.to_csv("results/compas/non-reweighed/neural_net.csv", encoding='utf-8')
compas_ensemble_nonreweigh.to_csv("results/compas/non-reweighed/ensemble.csv", encoding='utf-8')

print('all files saved to csv')

In [None]:
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

print("Classification with German data set\n")
df = pd.read_csv('dataset/german_credit.csv')
dataset_orig = preprocess_germandataset(df)

matrix_accuracy_reweigh = {}
matrix_accuracy_nonreweigh = {}
matrix_fairness_reweigh = {}
matrix_fairness_nonreweigh = {}

runs = 10

for i in range(0, runs):
    print('run =', i+1)
    
    train, test = dataset_orig.split([0.7], shuffle=True)
    train_transformed = reweighing_data(train, unprivileged_groups, privileged_groups)
    
    # with reweighing
    accuracy_reweigh, fairness_metrics_reweigh = make_prediction(train_transformed, test, unprivileged_groups, privileged_groups)
    
    # Without reweighing
    accuracy_nonreweigh, fairness_metrics_nonreweigh = make_prediction(train, test, unprivileged_groups, privileged_groups)
    
    # store values for each run
    matrix_accuracy_reweigh[i] = accuracy_reweigh
    matrix_fairness_reweigh[i] = fairness_metrics_reweigh
    matrix_accuracy_nonreweigh[i] = accuracy_nonreweigh
    matrix_fairness_nonreweigh[i] = fairness_metrics_nonreweigh

print('\nprediction completed')

metrics_adversarial_reweigh = []
metrics_prejudice_reweigh = []
metrics_nondebiasing_reweigh = []
metrics_ensemble_reweigh = []
metrics_adversarial_nonreweigh = []
metrics_prejudice_nonreweigh = []
metrics_nondebiasing_nonreweigh = []
metrics_ensemble_nonreweigh = []

for i in range(0, runs):
    
    # with reweighing
    metrics_adversarial_reweigh.append(matrix_fairness_reweigh[i][0])
    metrics_prejudice_reweigh.append(matrix_fairness_reweigh[i][1])
    metrics_nondebiasing_reweigh.append(matrix_fairness_reweigh[i][2])
    metrics_ensemble_reweigh.append(matrix_fairness_reweigh[i][3])
    
    # without reweighing
    metrics_adversarial_nonreweigh.append(matrix_fairness_nonreweigh[i][0])
    metrics_prejudice_nonreweigh.append(matrix_fairness_nonreweigh[i][1])
    metrics_nondebiasing_nonreweigh.append(matrix_fairness_nonreweigh[i][2])
    metrics_ensemble_nonreweigh.append(matrix_fairness_nonreweigh[i][3])

print('compiled all metrics')


# create data frame for all metrics
columns = ['Adversarial Debiasing', 'Prejudice Remover', 'Nondebiasing', 'Ensemble']
accuracy_reweigh = np.array(list(matrix_accuracy_reweigh.values()))
accuracy_nonreweigh = np.array(list(matrix_accuracy_nonreweigh.values()))
german_accuracy_reweigh = pd.DataFrame(accuracy_reweigh, columns=columns)
german_accuracy_nonreweigh = pd.DataFrame(accuracy_nonreweigh, columns=columns)

# fairness metrics
columns = ['Mean Difference', 'Disparate Impact', 'Equal Opportunity Difference', 'Average Odds Difference', 'Theil Index']
german_adversarial_reweigh = pd.DataFrame(metrics_adversarial_reweigh, columns=columns)
german_prejudice_reweigh = pd.DataFrame(metrics_prejudice_reweigh, columns=columns)
german_nondebiasing_reweigh = pd.DataFrame(metrics_nondebiasing_reweigh, columns=columns)
german_ensemble_reweigh = pd.DataFrame(metrics_ensemble_reweigh, columns=columns)

german_adversarial_nonreweigh = pd.DataFrame(metrics_adversarial_nonreweigh, columns=columns)
german_prejudice_nonreweigh = pd.DataFrame(metrics_prejudice_nonreweigh, columns=columns)
german_nondebiasing_nonreweigh = pd.DataFrame(metrics_nondebiasing_nonreweigh, columns=columns)
german_ensemble_nonreweigh = pd.DataFrame(metrics_ensemble_nonreweigh, columns=columns)


# save to csv
german_accuracy_reweigh.to_csv("results/german/reweighed/accuracy.csv", encoding='utf-8')
german_adversarial_reweigh.to_csv("results/german/reweighed/adversarial.csv", encoding='utf-8')
german_prejudice_reweigh.to_csv("results/german/reweighed/prejudice.csv", encoding='utf-8')
german_nondebiasing_reweigh.to_csv("results/german/reweighed/neural_net.csv", encoding='utf-8')
german_ensemble_reweigh.to_csv("results/german/reweighed/ensemble.csv", encoding='utf-8')

german_accuracy_nonreweigh.to_csv("results/german/non-reweighed/accuracy.csv", encoding='utf-8')
german_adversarial_nonreweigh.to_csv("results/german/non-reweighed/adversarial.csv", encoding='utf-8')
german_prejudice_nonreweigh.to_csv("results/german/non-reweighed/prejudice.csv", encoding='utf-8')
german_nondebiasing_nonreweigh.to_csv("results/german/non-reweighed/neural_net.csv", encoding='utf-8')
german_ensemble_nonreweigh.to_csv("results/german/non-reweighed/ensemble.csv", encoding='utf-8')

print('all files saved to csv')