In [1]:
import sys

import numpy as np

import pandas as pd

from sklearn.ensemble import RandomForestClassifier

from aif360.datasets import GermanDataset
from aif360.metrics import ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing

from IPython.display import Markdown, display

In [2]:
def print_fairness_metrics(CM:ClassificationMetric):
    print(f"accuracy = {round(CM.accuracy(), 4)}")
    print(f"theil index (goal:0) = {round(CM.theil_index(), 4)}")
    print(f"binary confusion matrix = {CM.binary_confusion_matrix()}")
    print(f"consistency (goal:1) = {round(CM.consistency()[0], 4)}")
    print(f"false positive rate difference (negative:privileged bias) = {round(CM.false_positive_rate_difference(), 4)}")
    print(f"false negative rate difference (negative:privileged bias) = {round(CM.false_negative_rate_difference(), 4)}")

In [3]:
def compare_fairness_metrics(CM1:ClassificationMetric, CM2:ClassificationMetric, side_by_side:bool=False):
    if not side_by_side:
        print_fairness_metrics(CM1)
        print("After Fairness Algos are applied")
        print("_"*10)
        print_fairness_metrics(CM2)
    else:
        print(f"accuracy = {round(CM1.accuracy(), 4)} => {round(CM2.accuracy(), 4)}")
        print(f"theil index (goal:0) = {round(CM1.theil_index(), 4)} => {round(CM2.theil_index(), 4)}")
        print(f"binary confusion matrix = {CM1.binary_confusion_matrix()} => {CM2.binary_confusion_matrix()}")
        print(f"consistency (goal:1) = {round(CM1.consistency()[0], 4)} => {round(CM2.consistency()[0], 4)}")
        print(f"false positive rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_positive_rate_difference(), 4)} => {round(CM2.false_positive_rate_difference(), 4)}")
        print(f"false negative rate difference (negative:privileged bias) = ", end="")
        print(f"{round(CM1.false_negative_rate_difference(), 4)} => {round(CM2.false_negative_rate_difference(), 4)}")

In [4]:
def compare_fairness_metrics_as_df(CM1:ClassificationMetric, CM2:ClassificationMetric,
                                   intervention:str) -> pd.DataFrame:
    metrics = np.array([[round(CM1.accuracy(), 4), round(CM2.accuracy(), 4)],
        [round(CM1.theil_index(), 4), round(CM2.theil_index(), 4)],
        [round(CM1.consistency()[0], 4), round(CM2.consistency()[0], 4)],
        [round(CM1.false_positive_rate_difference(), 4), round(CM2.false_positive_rate_difference(), 4)],
        [round(CM1.false_negative_rate_difference(), 4), round(CM2.false_negative_rate_difference(), 4)]]
    )
    df = pd.DataFrame(metrics, columns=["no intervention", intervention])
    df.index = ["accuracy", "theil index", "consistency", "false positive rate difference",
                "false negative rate difference"]
    return df

In [5]:
def analyze__debiasing_algos(dataset, privileged_groups, unprivileged_groups, classifier, 
                             preprocessing_algo:preprocessing = None, inprocessing_algo:inprocessing = None, 
                             postprocessing_algo:postprocessing = None):
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)
    classifier.fit(dataset_train.features, dataset_train.labels.ravel())
    results = classifier.predict(dataset_test.features)
    
    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM1 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)

    if preprocessing_algo not None:
        dataset_train = preprocessing_algo.fit_transform(dataset_train)
    if inprocessing_algo not None:
        dataset_train = inprocessing_algo.transform(dataset_train)
    if postprocessing_algo not None:
        
        
        
    _________________________
    RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                                 privileged_groups=privileged_groups)

    fair_dataset_train = RW.fit_transform(dataset_train)

    fair_RF = RandomForestClassifier(n_estimators=1100)
    fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

    results = fair_RF.predict(dataset_test.features)

    dataset_test_pred = dataset_test.copy()
    dataset_test_pred.labels = np.array([results]).transpose()

    CM2 = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    compare_fairness_metrics(CM1, CM2, side_by_side=True)

SyntaxError: invalid syntax (<ipython-input-5-337918e33ec5>, line 16)

In [5]:
dataset = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25], #age >= 25 is privileged
    features_to_drop=['personal_status', 'sex'] #ignore sex-related stuff
)

dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [6]:
RF = RandomForestClassifier(n_estimators=1100)
RF.fit(dataset_train.features, dataset_train.labels.ravel())

results = RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM1 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

RW = preprocessing.Reweighing(unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)

fair_dataset_train = RW.fit_transform(dataset_train)

fair_RF = RandomForestClassifier(n_estimators=1100)
fair_RF.fit(fair_dataset_train.features, fair_dataset_train.labels.ravel())

results = fair_RF.predict(dataset_test.features)

dataset_test_pred = dataset_test.copy()
dataset_test_pred.labels = np.array([results]).transpose()

CM2 = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)

In [7]:
print("German Dataset metrics")
compare_fairness_metrics_as_df(CM1, CM2, "reweighing")

German Dataset metrics


Unnamed: 0,no intervention,reweighing
accuracy,0.7167,0.73
theil index,0.128,0.1134
consistency,0.6387,0.6387
false positive rate difference,-0.0238,-0.0238
false negative rate difference,0.2066,0.1465
