In [4]:
import numpy as np

import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.base import clone

from tensorflow.keras.optimizers import SGD, Adam

from aif360.datasets import AdultDataset, BankDataset, CompasDataset, GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing

from IPython.display import Markdown, display

In [28]:
def run_classification_metrics(CM:ClassificationMetric):
    return np.array([
        round(CM.accuracy(), 4),
        round(CM.theil_index(), 4),
        round(CM.consistency()[0], 4),
        round(CM.false_positive_rate_difference(), 4),
        round(CM.false_negative_rate_difference(), 4),
        round(CM.error_rate_difference(), 4),
        round(CM.false_discovery_rate_difference(), 4),
        round(CM.false_omission_rate_difference(), 4)
    ])

In [29]:
def run_binary_dataset_metrics(BLDM:BinaryLabelDatasetMetric):
    return np.array([
        round(BLDM.statistical_parity_difference(), 4), # negative means privileged bias
        round(BLDM.base_rate(privileged=True), 4), # 1 means privileged bias
        round(BLDM.base_rate(privileged=False), 4), # 1 means unprivileged bias
    ])

In [33]:
def analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, classifier, 
                 preprocessing_algo=None):
    if preprocessing_algo is not None:
        _dataset_train = preprocessing_algo.fit_transform(dataset_train)
    else:
        _dataset_train = dataset_train
    
    classifier.fit(_dataset_train.features, dataset_train.labels.ravel())
    results = classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    BLDM = BinaryLabelDatasetMetric(_dataset_train,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    return np.concatenate((run_classification_metrics(CM), run_binary_dataset_metrics(BLDM)))

In [34]:
def run_preproc_algos_on_dataset(dataset, unprivileged_groups, privileged_groups):
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

    RF = RandomForestClassifier(n_estimators=1100)

    RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                                 privileged_groups=privileged_groups)

    DIR = preprocessing.DisparateImpactRemover()

    #OP = preprocessing.OptimPreproc(SGD, {"learning_rate":0.1}, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    LFR = preprocessing.LFR(unprivileged_groups, privileged_groups, k=7)

    metrics = analyze_algo(dataset_train.copy(), dataset_test, privileged_groups, unprivileged_groups, RF)
    df = pd.DataFrame(metrics, columns=["No Intervention"])
    df.index = ["accuracy", "theil index", "consistency", "false positive rate difference",
                "false negative rate difference", "error rate difference",
                "false discovery rate difference", "false omission rate difference",
                "stat parity difference", "priv base rate", "unpriv base rate"]
    df["Reweighing"] = analyze_algo(dataset_train.copy(), dataset_test, privileged_groups, unprivileged_groups, RF, RW)
    df["Disparate Impact Remover"] = analyze_algo(dataset_train.copy(), dataset_test, privileged_groups, unprivileged_groups, RF, DIR)
    #df["Optimized Preprocessing"] = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF, OP)
    df["Learning Fair Representations"] = analyze_algo(dataset_train.copy(), dataset_test, privileged_groups, unprivileged_groups, RF, LFR)
    return df

## Preprocessing Algos Analysis

In [36]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]
df = run_preproc_algos_on_dataset(dataset, unprivileged_groups, privileged_groups)
display(df.style.set_caption("German Dataset"))

dataset = AdultDataset()
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
df = run_preproc_algos_on_dataset(dataset, unprivileged_groups, privileged_groups)
display(df.style.set_caption("Adult Dataset"))

dataset = BankDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['day_of_week'] #ignore sex-related stuff
)
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]
df = run_preproc_algos_on_dataset(dataset, unprivileged_groups, privileged_groups)
display(df.style.set_caption("Bank Dataset"))

dataset = CompasDataset()
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
df = run_preproc_algos_on_dataset(dataset, unprivileged_groups, privileged_groups)
display(df.style.set_caption("Compas Dataset"))

Unnamed: 0,No Intervention,Reweighing,Disparate Impact Remover,Learning Fair Representations
accuracy,0.7867,0.7933,0.78,0.68
theil index,0.1205,0.1101,0.1248,0.1214
consistency,0.7013,0.7013,0.7013,0.7013
false positive rate difference,0.0789,-0.0224,0.064,0.0597
false negative rate difference,0.1072,0.1678,0.1924,-0.0979
error rate difference,0.1379,0.1456,0.1892,0.0448
false discovery rate difference,0.1278,0.1064,0.1466,0.0943
false omission rate difference,0.1084,0.1572,0.1985,-0.8261
stat parity difference,-0.1357,0.0,-0.1357,0.0
priv base rate,0.7085,0.6871,0.7085,1.0




Unnamed: 0,No Intervention,Reweighing,Disparate Impact Remover,Learning Fair Representations
accuracy,0.8445,0.8434,0.8391,0.7515
theil index,0.1188,0.1199,0.1186,0.2856
consistency,0.847,0.847,0.847,0.847
false positive rate difference,-0.0902,-0.0894,-0.1117,0.0
false negative rate difference,0.1003,0.0986,0.133,0.0
error rate difference,-0.1165,-0.1167,-0.1255,-0.2031
false discovery rate difference,0.015,0.0201,-0.0266,0.0
false omission rate difference,-0.0999,-0.101,-0.0955,-0.2031
stat parity difference,-0.1971,-0.0,-0.1971,0.0249
priv base rate,0.3121,0.2476,0.3121,0.5903




Unnamed: 0,No Intervention,Reweighing,Disparate Impact Remover,Learning Fair Representations
accuracy,0.904,0.9043,0.8371,0.8791
theil index,0.0758,0.0754,0.0823,0.1289
consistency,0.9017,0.9017,0.9017,0.9017
false positive rate difference,0.034,0.0386,-0.029,0.0
false negative rate difference,-0.0112,-0.0074,0.1822,0.0
error rate difference,0.0581,0.0623,0.0275,0.0708
false discovery rate difference,0.0116,0.0278,-0.1116,0.0
false omission rate difference,0.0444,0.0453,0.0715,0.0708
stat parity difference,0.1232,0.0,0.1232,0.0
priv base rate,0.1256,0.129,0.1256,0.0




Unnamed: 0,No Intervention,Reweighing,Disparate Impact Remover,Learning Fair Representations
accuracy,0.6548,0.6521,0.6218,0.5014
theil index,0.2349,0.2412,0.2862,0.5742
consistency,0.6591,0.6591,0.6591,0.6591
false positive rate difference,-0.1229,-0.1203,-0.1644,0.1249
false negative rate difference,0.0994,0.1119,0.1726,-0.1664
error rate difference,0.0242,0.0311,0.043,-0.1185
false discovery rate difference,0.0657,0.0698,0.0718,0.1105
false omission rate difference,-0.0769,-0.0663,-0.0549,-0.1276
stat parity difference,-0.1345,-0.0,-0.1345,0.0
priv base rate,0.6514,0.5429,0.6514,1.0


In [20]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

bldm = BinaryLabelDatasetMetric(dataset_train, unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)
print(run_binary_dataset_metrics(bldm))
bldm = BinaryLabelDatasetMetric(dataset_train, unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)
print(run_binary_dataset_metrics(bldm))
bldm = BinaryLabelDatasetMetric(dataset_train, unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)
print(run_binary_dataset_metrics(bldm))


[-0.1687  0.7201  0.5514]
[-0.1687  0.7201  0.5514]
[-0.1687  0.7201  0.5514]


# Disregard everything underneath

In [2]:
def print_fairness_metrics(CM:ClassificationMetric):
    print(f"accuracy = {round(CM.accuracy(), 4)}")
    print(f"theil index (goal:0) = {round(CM.theil_index(), 4)}")
    print(f"binary confusion matrix = {CM.binary_confusion_matrix()}")
    print(f"consistency (goal:1) = {round(CM.consistency()[0], 4)}")
    print(f"false positive rate difference (negative:privileged bias) = {round(CM.false_positive_rate_difference(), 4)}")
    print(f"false negative rate difference (negative:privileged bias) = {round(CM.false_negative_rate_difference(), 4)}")

In [3]:
def compare_fairness_metrics(CM1:ClassificationMetric, CM2:ClassificationMetric, side_by_side:bool=False):
    if not side_by_side:
        print_fairness_metrics(CM1)
        print("After Fairness Algos are applied")
        print("_"*10)
        print_fairness_metrics(CM2)
    else:
        print(f"accuracy = {round(CM1.accuracy(), 4)} => {round(CM2.accuracy(), 4)}")
        print(f"theil index (goal:0) = {round(CM1.theil_index(), 4)} => {round(CM2.theil_index(), 4)}")
        print(f"binary confusion matrix = {CM1.binary_confusion_matrix()} => {CM2.binary_confusion_matrix()}")
        print(f"consistency (goal:1) = {round(CM1.consistency()[0], 4)} => {round(CM2.consistency()[0], 4)}")
        print(f"false positive rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_positive_rate_difference(), 4)} => {round(CM2.false_positive_rate_difference(), 4)}")
        print(f"false negative rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_negative_rate_difference(), 4)} => {round(CM2.false_negative_rate_difference(), 4)}")

In [41]:
def compare_fairness_metrics_as_df(CM1:ClassificationMetric, CM2:ClassificationMetric,
                                   intervention:str) -> pd.DataFrame:
    metrics = np.array([[round(CM1.accuracy(), 4), round(CM2.accuracy(), 4)],
        [round(CM1.theil_index(), 4), round(CM2.theil_index(), 4)],
        [round(CM1.consistency()[0], 4), round(CM2.consistency()[0], 4)],
        [round(CM1.false_positive_rate_difference(), 4), round(CM2.false_positive_rate_difference(), 4)],
        [round(CM1.false_negative_rate_difference(), 4), round(CM2.false_negative_rate_difference(), 4)]]
    )
    df = pd.DataFrame(metrics, columns=["no intervention", intervention])
    df.index = ["accuracy", "theil index", "consistency", "false positive rate difference",
                "false negative rate difference"]
    return df

In [9]:
def analyze_debiasing_algos(dataset, privileged_groups, unprivileged_groups, classifier, 
                             preprocessing_algo:preprocessing = None, inprocessing_algo:inprocessing = None, 
                             postprocessing_algo:postprocessing = None):
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)
    classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    results = classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM1 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)

    if preprocessing_algo is not None:
        dataset_train = preprocessing_algo.fit_transform(dataset_train)
    fair_classifier = clone(classifier)
    fair_classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    
    results = fair_classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()
    
    CM2 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    return compare_fairness_metrics_as_df(CM1, CM2, "preprocessing")
        
    '''_________________________
    RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                                 privileged_groups=privileged_groups)

    fair_dataset_train = RW.fit_transform(dataset_train)

    fair_RF = RandomForestClassifier(n_estimators=1100)
    fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

    results = fair_RF.predict(dataset_test.features)

    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM2 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    compare_fairness_metrics(CM1, CM2, side_by_side=True)'''

In [36]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)

dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [37]:
RF = RandomForestClassifier(n_estimators=1100)
RF.fit(dataset_train.features, dataset_train.labels.ravel())

results = RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM1 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

RW = preprocessing.LFR(unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)

fair_dataset_train = RW.fit_transform(dataset_train)

fair_RF = RandomForestClassifier(n_estimators=1100)
fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

results = fair_RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM2 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

In [43]:
print("German Dataset metrics")
compare_fairness_metrics_as_df(CM1, CM2, "reweighing")
print(CM1.binary_confusion_matrix(), CM2.binary_confusion_matrix())

German Dataset metrics
{'TP': 192.0, 'FP': 51.0, 'TN': 40.0, 'FN': 17.0} {'TP': 209.0, 'FP': 91.0, 'TN': 0.0, 'FN': 0.0}
