In [24]:
import sys

import numpy as np

import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.base import clone

from tensorflow.keras.optimizers import SGD, Adam

from aif360.datasets import GermanDataset
from aif360.metrics import ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing

from IPython.display import Markdown, display

In [27]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)

dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

RF = RandomForestClassifier(n_estimators=1100)

RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)

DIR = preprocessing.DisparateImpactRemover()

OP = preprocessing.OptimPreproc(SGD, {"lr":0.1}, unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)

LFR = preprocessing.LFR(unprivileged_groups, privileged_groups)

metrics = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF)
df = pd.DataFrame(metrics, columns=["No Intervention"])
df.index = ["accuracy", "theil index", "consistency", "false positive rate difference",
            "false negative rate difference"]
df["Reweighing"] = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF, RW)
df["Disparate Impact Remover"] = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF, DIR)
#df["Optimized Preprocessing"] = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF, OP)
df["Learning Fair Representations"] = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, RF, LFR)
df

Privileged and unprivileged groups specified will not be used. The protected attributes are directly specified in the data preprocessing function. The current implementation automatically adjusts for discrimination across all groups. This can be changed by changing the optimization code.


Unnamed: 0,No Intervention,Reweighing,Disparate Impact Remover,Learning Fair Representations
accuracy,0.77,0.77,0.7533,0.7067
theil index,0.1111,0.1173,0.1455,0.0572
consistency,0.702,0.702,0.702,0.702
false positive rate difference,-0.0534,-0.0915,-0.0481,0.0
false negative rate difference,0.0084,0.0351,0.0287,0.0


In [20]:
def run_fairness_metrics(CM:ClassificationMetric):
    return np.array([
        round(CM.accuracy(), 4),
        round(CM.theil_index(), 4),
        round(CM.consistency()[0], 4),
        round(CM.false_positive_rate_difference(), 4),
        round(CM.false_negative_rate_difference(), 4)
    ])

In [14]:
def analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, classifier, 
                 preprocessing_algo=None):
    if preprocessing_algo is not None:
        dataset_train = preprocessing_algo.fit_transform(dataset_train)
    
    classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    results = classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    return run_fairness_metrics(CM)

# Disregard everything underneath

In [2]:
def print_fairness_metrics(CM:ClassificationMetric):
    print(f"accuracy = {round(CM.accuracy(), 4)}")
    print(f"theil index (goal:0) = {round(CM.theil_index(), 4)}")
    print(f"binary confusion matrix = {CM.binary_confusion_matrix()}")
    print(f"consistency (goal:1) = {round(CM.consistency()[0], 4)}")
    print(f"false positive rate difference (negative:privileged bias) = {round(CM.false_positive_rate_difference(), 4)}")
    print(f"false negative rate difference (negative:privileged bias) = {round(CM.false_negative_rate_difference(), 4)}")

In [3]:
def compare_fairness_metrics(CM1:ClassificationMetric, CM2:ClassificationMetric, side_by_side:bool=False):
    if not side_by_side:
        print_fairness_metrics(CM1)
        print("After Fairness Algos are applied")
        print("_"*10)
        print_fairness_metrics(CM2)
    else:
        print(f"accuracy = {round(CM1.accuracy(), 4)} => {round(CM2.accuracy(), 4)}")
        print(f"theil index (goal:0) = {round(CM1.theil_index(), 4)} => {round(CM2.theil_index(), 4)}")
        print(f"binary confusion matrix = {CM1.binary_confusion_matrix()} => {CM2.binary_confusion_matrix()}")
        print(f"consistency (goal:1) = {round(CM1.consistency()[0], 4)} => {round(CM2.consistency()[0], 4)}")
        print(f"false positive rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_positive_rate_difference(), 4)} => {round(CM2.false_positive_rate_difference(), 4)}")
        print(f"false negative rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_negative_rate_difference(), 4)} => {round(CM2.false_negative_rate_difference(), 4)}")

In [2]:
def compare_fairness_metrics_as_df(df:pd.DataFrame, CM1:ClassificationMetric, CM2:ClassificationMetric,
                                   intervention:str) -> pd.DataFrame:
    metrics = np.array([[round(CM1.accuracy(), 4), round(CM2.accuracy(), 4)],
        [round(CM1.theil_index(), 4), round(CM2.theil_index(), 4)],
        [round(CM1.consistency()[0], 4), round(CM2.consistency()[0], 4)],
        [round(CM1.false_positive_rate_difference(), 4), round(CM2.false_positive_rate_difference(), 4)],
        [round(CM1.false_negative_rate_difference(), 4), round(CM2.false_negative_rate_difference(), 4)]]
    )
    df = pd.DataFrame(metrics, columns=["no intervention", intervention])
    df.index = ["accuracy", "theil index", "consistency", "false positive rate difference",
                "false negative rate difference"]
    return df

In [9]:
def analyze_debiasing_algos(dataset, privileged_groups, unprivileged_groups, classifier, 
                             preprocessing_algo:preprocessing = None, inprocessing_algo:inprocessing = None, 
                             postprocessing_algo:postprocessing = None):
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)
    classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    results = classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM1 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)

    if preprocessing_algo is not None:
        dataset_train = preprocessing_algo.fit_transform(dataset_train)
    fair_classifier = clone(classifier)
    fair_classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    
    results = fair_classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()
    
    CM2 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    return compare_fairness_metrics_as_df(CM1, CM2, "preprocessing")
        
    '''_________________________
    RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                                 privileged_groups=privileged_groups)

    fair_dataset_train = RW.fit_transform(dataset_train)

    fair_RF = RandomForestClassifier(n_estimators=1100)
    fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

    results = fair_RF.predict(dataset_test.features)

    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM2 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    compare_fairness_metrics(CM1, CM2, side_by_side=True)'''

In [13]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)

dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [14]:
RF = RandomForestClassifier(n_estimators=1100)
RF.fit(dataset_train.features, dataset_train.labels.ravel())

results = RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM1 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)

fair_dataset_train = RW.fit_transform(dataset_train)

fair_RF = RandomForestClassifier(n_estimators=1100)
fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

results = fair_RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM2 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

In [16]:
print("German Dataset metrics")
compare_fairness_metrics_as_df(CM1, CM2, "reweighing")

German Dataset metrics


Unnamed: 0,no intervention,reweighing
accuracy,0.79,0.78
theil index,0.0863,0.0909
consistency,0.694,0.694
false positive rate difference,-0.2635,-0.2184
false negative rate difference,0.1222,0.1166
