In [3]:
from sklearn.model_selection import StratifiedShuffleSplit
from adafair import AdaFair
from adafair import load_adult
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [4]:
def calculate_performance(data, labels, predictions, saIndex, saValue):
    tp_protected = 0.
    tn_protected = 0.
    fp_protected = 0.
    fn_protected = 0.

    tp_non_protected = 0.
    tn_non_protected = 0.
    fp_non_protected = 0.
    fn_non_protected = 0.
    for idx, val in enumerate(data):
        # protected population
        if val[saIndex] == saValue:
            # correctly classified
            if labels[idx] == predictions[idx]:
                if labels[idx] == 1:
                    tp_protected += 1.
                else:
                    tn_protected += 1.
            # misclassified
            else:
                if labels[idx] == 1:
                    fn_protected += 1.
                else:
                    fp_protected += 1.
        else:
            # correctly classified
            if labels[idx] == predictions[idx]:
                if labels[idx] == 1:
                    tp_non_protected += 1.
                else:
                    tn_non_protected += 1.
            # misclassified
            else:
                if labels[idx] == 1:
                    fn_non_protected += 1.
                else:
                    fp_non_protected += 1.

    tpr_protected = tp_protected / (tp_protected + fn_protected)
    tnr_protected = tn_protected / (tn_protected + fp_protected)

    tpr_non_protected = tp_non_protected / (tp_non_protected + fn_non_protected)
    tnr_non_protected = tn_non_protected / (tn_non_protected + fp_non_protected)

    output = dict()
    output["balanced_accuracy"] =( (tp_protected + tp_non_protected)/(tp_protected + tp_non_protected + fn_protected + fn_non_protected) +
                                   (tn_protected + tn_non_protected) / (tn_protected + tn_non_protected + fp_protected + fp_non_protected))*0.5

    output["accuracy"] = accuracy_score(labels, predictions)
    output["fairness"] = abs(tpr_non_protected - tpr_protected) + abs(tnr_non_protected - tnr_protected)
    output["TPR_protected"] = tpr_protected
    output["TPR_non_protected"] = tpr_non_protected
    output["TNR_protected"] = tnr_protected
    output["TNR_non_protected"] = tnr_non_protected
    return output

In [5]:
X, y, sa_index, p_Group = load_adult("race")

In [8]:
strat_suffle = StratifiedShuffleSplit(n_splits=5, test_size=0.2)
results = []
for train_index, test_index in strat_suffle.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    classifier = AdaFair(n_estimators=200, saIndex=sa_index, saValue=p_Group, trade_off_c=1.0)

    classifier.fit(X_train, y_train)
    y_pred_labels = classifier.predict(X_test)
    print(calculate_performance(X_test, y_test, y_pred_labels,  sa_index, p_Group))

{'balanced_accuracy': 0.7967634749816042, 'accuracy': 0.8219147758716104, 'fairness': 0.051839908506183585, 'TPR_protected': 0.7336956521739131, 'TPR_non_protected': 0.7480544747081712, 'TNR_protected': 0.8780709736123748, 'TNR_non_protected': 0.8405898876404494}
{'balanced_accuracy': 0.807815292231683, 'accuracy': 0.7969009407858328, 'fairness': 0.10075486727097083, 'TPR_protected': 0.8731707317073171, 'TPR_non_protected': 0.8250614250614251, 'TNR_protected': 0.8301886792452831, 'TNR_non_protected': 0.7775431186202042}
{'balanced_accuracy': 0.7976707860296437, 'accuracy': 0.827780852241284, 'fairness': 0.08583055797602135, 'TPR_protected': 0.79, 'TPR_non_protected': 0.7328431372549019, 'TNR_protected': 0.881578947368421, 'TNR_non_protected': 0.8529052521374978}


KeyboardInterrupt: 