In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from itertools import product

import numpy as np

from aif360.metrics import BinaryLabelDatasetMetric, utils
from aif360.datasets import BinaryLabelDataset
from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset

ModuleNotFoundError: No module named 'aif360'

In [None]:
def preprocess_german(df, preprocess):
    df['status'] = df['status'].map({'A11': 0, 'A12': 1, 'A13': 2, 'A14': 3}).astype(int)
    df['credit_hist'] = df['credit_hist'].map({'A34': 0, 'A33': 1, 'A32': 2, 'A31': 3, 'A30': 4}).astype(int)
    df.loc[(df['credit_amt'] <= 2000), 'credit_amt'] = 0
    df.loc[(df['credit_amt'] > 2000) & (df['credit_amt'] <= 5000), 'credit_amt'] = 1
    df.loc[(df['credit_amt'] > 5000), 'credit_amt'] = 2    
    df.loc[(df['duration'] <= 12), 'duration'] = 0
    df.loc[(df['duration'] > 12) & (df['duration'] <= 24), 'duration'] = 1
    df.loc[(df['duration'] > 24) & (df['duration'] <= 36), 'duration'] = 2
    df.loc[(df['duration'] > 36), 'duration'] = 3
    df['age'] = df['age'].apply(lambda x : 1 if x >= 45 else 0) # 1 if old, 0 if young

    df['savings'] = df['savings'].map({'A61': 0, 'A62': 1, 'A63': 2, 'A64': 3, 'A65': 4}).astype(int)
    df['employment'] = df['employment'].map({'A71': 0, 'A72': 1, 'A73': 2, 'A74': 3, 'A75': 4}).astype(int)    
    df['gender'] = df['personal_status'].map({'A91': 1, 'A92': 0, 'A93': 1, 'A94': 1, 'A95': 0}).astype(int)
    df['debtors'] = df['debtors'].map({'A101': 0, 'A102': 1, 'A103': 2}).astype(int)
    df['property'] = df['property'].map({'A121': 3, 'A122': 2, 'A123': 1, 'A124': 0}).astype(int)        
    df['install_plans'] = df['install_plans'].map({'A141': 1, 'A142': 1, 'A143': 0}).astype(int)
    if preprocess:
        df = pd.concat([df, pd.get_dummies(df['purpose'], prefix='purpose')],axis=1)
        df = pd.concat([df, pd.get_dummies(df['housing'], prefix='housing')],axis=1)
    df['job'] = df['job'].map({'A171': 0, 'A172': 1, 'A173': 2, 'A174': 3}).astype(int)    
    df['telephone'] = df['telephone'].map({'A191': 0, 'A192': 1}).astype(int)
    df['foreign_worker'] = df['foreign_worker'].map({'A201': 1, 'A202': 0}).astype(int)

    return df

In [None]:
def load_german(preprocess=True):
    cols = ['status', 'duration', 'credit_hist', 'purpose', 'credit_amt', 'savings', 'employment',\
            'install_rate', 'personal_status', 'debtors', 'residence', 'property', 'age', 'install_plans',\
            'housing', 'num_credits', 'job', 'num_liable', 'telephone', 'foreign_worker', 'credit']
    df = pd.read_table('german.data', names=cols, sep=" ", index_col=False)
    df['credit'] = df['credit'].replace(2, 0) #1 = Good, 2= Bad credit risk
    y = df['credit']
    df = preprocess_german(df, preprocess)
    if preprocess:
        df = df.drop(columns=['purpose', 'personal_status', 'housing', 'credit'])
    else:
        df = df.drop(columns=['personal_status', 'credit'])
    
    X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=1)
    X_train = X_train.reset_index(drop=True)
    X_test = X_test.reset_index(drop=True)
    y_train = y_train.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)
    
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = load_german(1)

In [None]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [None]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
class ClassificationMetric(BinaryLabelDatasetMetric):
    """Class for computing metrics based on two BinaryLabelDatasets.
    The first dataset is the original one and the second is the output of the
    classification transformer (or similar).
    """

    def __init__(self, dataset, classified_dataset,
                 unprivileged_groups=None, privileged_groups=None):
        """
        Args:
            dataset (BinaryLabelDataset): Dataset containing ground-truth
                labels.
            classified_dataset (BinaryLabelDataset): Dataset containing
                predictions.
            privileged_groups (list(dict)): Privileged groups. Format is a list
                of `dicts` where the keys are `protected_attribute_names` and
                the values are values in `protected_attributes`. Each `dict`
                element describes a single group. See examples for more details.
            unprivileged_groups (list(dict)): Unprivileged groups in the same
                format as `privileged_groups`.
        Raises:
            TypeError: `dataset` and `classified_dataset` must be
                :obj:`~aif360.datasets.BinaryLabelDataset` types.
        """
        if not isinstance(dataset, BinaryLabelDataset) and not isinstance(dataset, MulticlassLabelDataset) :
            raise TypeError("'dataset' should be a BinaryLabelDataset or a MulticlassLabelDataset")

        # sets self.dataset, self.unprivileged_groups, self.privileged_groups
        super(ClassificationMetric, self).__init__(dataset,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        if isinstance(classified_dataset, BinaryLabelDataset) or isinstance(classified_dataset, MulticlassLabelDataset) :
            self.classified_dataset = classified_dataset
        else:
            raise TypeError("'classified_dataset' should be a "
                            "BinaryLabelDataset or a MulticlassLabelDataset.")

        if isinstance(self.classified_dataset, MulticlassLabelDataset):
            fav_label_value = 1.
            unfav_label_value = 0.

            self.classified_dataset = self.classified_dataset.copy()
            # Find all the labels which match any of the favorable labels
            fav_idx = np.logical_or.reduce(np.equal.outer(self.classified_dataset.favorable_label, self.classified_dataset.labels))
            # Replace labels with corresponding values
            self.classified_dataset.labels = np.where(fav_idx, fav_label_value, unfav_label_value)
            
            self.classified_dataset.favorable_label = float(fav_label_value)
            self.classified_dataset.unfavorable_label = float(unfav_label_value)
        
        # Verify if everything except the predictions and metadata are the same
        # for the two datasets
        with self.dataset.temporarily_ignore('labels', 'scores'):
            if self.dataset != self.classified_dataset:
                raise ValueError("The two datasets are expected to differ only "
                                 "in 'labels' or 'scores'.")

    def binary_confusion_matrix(self, privileged=None):
        """Compute the number of true/false positives/negatives, optionally
        conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Returns:
            dict: Number of true positives, false positives, true negatives,
            false negatives (optionally conditioned).
        """
        condition = self._to_condition(privileged)

        return utils.compute_num_TF_PN(self.dataset.protected_attributes,
            self.dataset.labels, self.classified_dataset.labels,
            self.dataset.instance_weights,
            self.dataset.protected_attribute_names,
            self.dataset.favorable_label, self.dataset.unfavorable_label,
            condition=condition)

    def generalized_binary_confusion_matrix(self, privileged=None):
        """Compute the number of generalized true/false positives/negatives,
        optionally conditioned on protected attributes. Generalized counts are
        based on scores and not on the hard predictions.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Returns:
            dict: Number of generalized true positives, generalized false
            positives, generalized true negatives, generalized false negatives
            (optionally conditioned).
        """
        condition = self._to_condition(privileged)

        return utils.compute_num_gen_TF_PN(self.dataset.protected_attributes,
            self.dataset.labels, self.classified_dataset.scores,
            self.dataset.instance_weights,
            self.dataset.protected_attribute_names,
            self.dataset.favorable_label, self.dataset.unfavorable_label,
            condition=condition)

    def num_true_positives(self, privileged=None):
        r"""Return the number of instances in the dataset where both the
        predicted and true labels are 'favorable',
        :math:`TP = \sum_{i=1}^n \mathbb{1}[y_i = \text{favorable}]\mathbb{1}[\hat{y}_i = \text{favorable}]`,
        optionally conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.binary_confusion_matrix(privileged=privileged)['TP']

    def num_false_positives(self, privileged=None):
        r""":math:`FP = \sum_{i=1}^n \mathbb{1}[y_i = \text{unfavorable}]\mathbb{1}[\hat{y}_i = \text{favorable}]`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.binary_confusion_matrix(privileged=privileged)['FP']

    def num_false_negatives(self, privileged=None):
        r""":math:`FN = \sum_{i=1}^n \mathbb{1}[y_i = \text{favorable}]\mathbb{1}[\hat{y}_i = \text{unfavorable}]`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.binary_confusion_matrix(privileged=privileged)['FN']

    def num_true_negatives(self, privileged=None):
        r""":math:`TN = \sum_{i=1}^n \mathbb{1}[y_i = \text{unfavorable}]\mathbb{1}[\hat{y}_i = \text{unfavorable}]`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.binary_confusion_matrix(privileged=privileged)['TN']

    def num_generalized_true_positives(self, privileged=None):
        """Return the generalized number of true positives, :math:`GTP`, the
        weighted sum of predicted scores where true labels are 'favorable',
        optionally conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.generalized_binary_confusion_matrix(
            privileged=privileged)['GTP']

    def num_generalized_false_positives(self, privileged=None):
        """Return the generalized number of false positives, :math:`GFP`, the
        weighted sum of predicted scores where true labels are 'unfavorable',
        optionally conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` must be
                must be provided at initialization to condition on them.
        """
        return self.generalized_binary_confusion_matrix(
            privileged=privileged)['GFP']

    def num_generalized_false_negatives(self, privileged=None):
        """Return the generalized number of false negatives, :math:`GFN`, the
        weighted sum of 1 - predicted scores where true labels are 'favorable',
        optionally conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.generalized_binary_confusion_matrix(
            privileged=privileged)['GFN']

    def num_generalized_true_negatives(self, privileged=None):
        """Return the generalized number of true negatives, :math:`GTN`, the
        weighted sum of 1 - predicted scores where true labels are 'unfavorable',
        optionally conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.generalized_binary_confusion_matrix(
            privileged=privileged)['GTN']

    def performance_measures(self, privileged=None):
        """Compute various performance measures on the dataset, optionally
        conditioned on protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Returns:
            dict: True positive rate, true negative rate, false positive rate,
            false negative rate, positive predictive value, negative predictive
            value, false discover rate, false omission rate, and accuracy
            (optionally conditioned).
        """
        TP = self.num_true_positives(privileged=privileged)
        FP = self.num_false_positives(privileged=privileged)
        FN = self.num_false_negatives(privileged=privileged)
        TN = self.num_true_negatives(privileged=privileged)
        GTP = self.num_generalized_true_positives(privileged=privileged)
        GFP = self.num_generalized_false_positives(privileged=privileged)
        GFN = self.num_generalized_false_negatives(privileged=privileged)
        GTN = self.num_generalized_true_negatives(privileged=privileged)
        P = self.num_positives(privileged=privileged)
        N = self.num_negatives(privileged=privileged)

        return dict(
            TPR=TP / P, TNR=TN / N, FPR=FP / N, FNR=FN / P,
            GTPR=GTP / P, GTNR=GTN / N, GFPR=GFP / N, GFNR=GFN / P,
            PPV=TP / (TP+FP) if (TP+FP) > 0.0 else np.float64(0.0),
            NPV=TN / (TN+FN) if (TN+FN) > 0.0 else np.float64(0.0),
            FDR=FP / (FP+TP) if (FP+TP) > 0.0 else np.float64(0.0),
            FOR=FN / (FN+TN) if (FN+TN) > 0.0 else np.float64(0.0),
            ACC=(TP+TN) / (P+N) if (P+N) > 0.0 else np.float64(0.0)
        )

    def true_positive_rate(self, privileged=None):
        """Return the ratio of true positives to positive examples in the
        dataset, :math:`TPR = TP/P`, optionally conditioned on protected
        attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['TPR']

    def false_positive_rate(self, privileged=None):
        """:math:`FPR = FP/N`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['FPR']

    def false_negative_rate(self, privileged=None):
        """:math:`FNR = FN/P`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['FNR']

    def true_negative_rate(self, privileged=None):
        """:math:`TNR = TN/N`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['TNR']

    def generalized_true_positive_rate(self, privileged=None):
        """Return the ratio of generalized true positives to positive examples
        in the dataset, :math:`GTPR = GTP/P`, optionally conditioned on
        protected attributes.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['GTPR']

    def generalized_false_positive_rate(self, privileged=None):
        """:math:`GFPR = GFP/N`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['GFPR']

    def generalized_false_negative_rate(self, privileged=None):
        """:math:`GFNR = GFN/P`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups`
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['GFNR']

    def generalized_true_negative_rate(self, privileged=None):
        """:math:`GTNR = GTN/N`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['GTNR']

    def positive_predictive_value(self, privileged=None):
        """:math:`PPV = TP/(TP + FP)`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['PPV']

    def false_discovery_rate(self, privileged=None):
        """:math:`FDR = FP/(TP + FP)`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['FDR']

    def false_omission_rate(self, privileged=None):
        """:math:`FOR = FN/(TN + FN)`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['FOR']

    def negative_predictive_value(self, privileged=None):
        """:math:`NPV = TN/(TN + FN)`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['NPV']

    def accuracy(self, privileged=None):
        """:math:`ACC = (TP + TN)/(P + N)`.
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return self.performance_measures(privileged=privileged)['ACC']

    def error_rate(self, privileged=None):
        """:math:`ERR = (FP + FN)/(P + N)`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return 1. - self.accuracy(privileged=privileged)

    def true_positive_rate_difference(self):
        r""":math:`TPR_{D = \text{unprivileged}} - TPR_{D = \text{privileged}}`
        """
        return self.difference(self.true_positive_rate)

    def false_positive_rate_difference(self):
        r""":math:`FPR_{D = \text{unprivileged}} - FPR_{D = \text{privileged}}`
        """
        return self.difference(self.false_positive_rate)

    def false_negative_rate_difference(self):
        r""":math:`FNR_{D = \text{unprivileged}} - FNR_{D = \text{privileged}}`
        """
        return self.difference(self.false_negative_rate)

    def false_omission_rate_difference(self):
        r""":math:`FOR_{D = \text{unprivileged}} - FOR_{D = \text{privileged}}`
        """
        return self.difference(self.false_omission_rate)

    def false_discovery_rate_difference(self):
        r""":math:`FDR_{D = \text{unprivileged}} - FDR_{D = \text{privileged}}`
        """
        return self.difference(self.false_discovery_rate)

    def false_positive_rate_ratio(self):
        r""":math:`\frac{FPR_{D = \text{unprivileged}}}{FPR_{D = \text{privileged}}}`
        """
        return self.ratio(self.false_positive_rate)

    def false_negative_rate_ratio(self):
        r""":math:`\frac{FNR_{D = \text{unprivileged}}}{FNR_{D = \text{privileged}}}`
        """
        return self.ratio(self.false_negative_rate)

    def false_omission_rate_ratio(self):
        r""":math:`\frac{FOR_{D = \text{unprivileged}}}{FOR_{D = \text{privileged}}}`
        """
        return self.ratio(self.false_omission_rate)

    def false_discovery_rate_ratio(self):
        r""":math:`\frac{FDR_{D = \text{unprivileged}}}{FDR_{D = \text{privileged}}}`
        """
        return self.ratio(self.false_discovery_rate)

    def average_odds_difference(self):
        r"""Average of difference in FPR and TPR for unprivileged and privileged
        groups:
        .. math::
           \tfrac{1}{2}\left[(FPR_{D = \text{unprivileged}} - FPR_{D = \text{privileged}})
           + (TPR_{D = \text{unprivileged}} - TPR_{D = \text{privileged}}))\right]
        A value of 0 indicates equality of odds.
        """
        return 0.5 * (self.difference(self.false_positive_rate)
                    + self.difference(self.true_positive_rate))

    def average_abs_odds_difference(self):
        r"""Average of absolute difference in FPR and TPR for unprivileged and
        privileged groups:
        .. math::
           \tfrac{1}{2}\left[|FPR_{D = \text{unprivileged}} - FPR_{D = \text{privileged}}|
           + |TPR_{D = \text{unprivileged}} - TPR_{D = \text{privileged}}|\right]
        A value of 0 indicates equality of odds.
        """
        return 0.5 * (np.abs(self.difference(self.false_positive_rate))
                    + np.abs(self.difference(self.true_positive_rate)))

    def error_rate_difference(self):
        r"""Difference in error rates for unprivileged and privileged groups,
        :math:`ERR_{D = \text{unprivileged}} - ERR_{D = \text{privileged}}`.
        """
        return self.difference(self.error_rate)

    def error_rate_ratio(self):
        r"""Ratio of error rates for unprivileged and privileged groups,
        :math:`\frac{ERR_{D = \text{unprivileged}}}{ERR_{D = \text{privileged}}}`.
        """
        return self.ratio(self.error_rate)

    def num_pred_positives(self, privileged=None):
        r""":math:`\sum_{i=1}^n \mathbb{1}[\hat{y}_i = \text{favorable}]`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        condition = self._to_condition(privileged)

        return utils.compute_num_pos_neg(
            self.classified_dataset.protected_attributes,
            self.classified_dataset.labels,
            self.classified_dataset.instance_weights,
            self.classified_dataset.protected_attribute_names,
            self.classified_dataset.favorable_label,
            condition=condition)

    def num_pred_negatives(self, privileged=None):
        r""":math:`\sum_{i=1}^n \mathbb{1}[\hat{y}_i = \text{unfavorable}]`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        condition = self._to_condition(privileged)

        return utils.compute_num_pos_neg(
            self.classified_dataset.protected_attributes,
            self.classified_dataset.labels,
            self.classified_dataset.instance_weights,
            self.classified_dataset.protected_attribute_names,
            self.classified_dataset.unfavorable_label,
            condition=condition)

    def selection_rate(self, privileged=None):
        r""":math:`Pr(\hat{Y} = \text{favorable})`
        Args:
            privileged (bool, optional): Boolean prescribing whether to
                condition this metric on the `privileged_groups`, if `True`, or
                the `unprivileged_groups`, if `False`. Defaults to `None`
                meaning this metric is computed over the entire dataset.
        Raises:
            AttributeError: `privileged_groups` or `unprivileged_groups` 
                must be provided at initialization to condition on them.
        """
        return (self.num_pred_positives(privileged=privileged)
              / self.num_instances(privileged=privileged))

    def disparate_impact(self):
        r"""
        .. math::
           \frac{Pr(\hat{Y} = 1 | D = \text{unprivileged})}
           {Pr(\hat{Y} = 1 | D = \text{privileged})}
        """
        return self.ratio(self.selection_rate)

    def statistical_parity_difference(self):
        r"""
        .. math::
           Pr(\hat{Y} = 1 | D = \text{unprivileged})
           - Pr(\hat{Y} = 1 | D = \text{privileged})
        """
        return self.difference(self.selection_rate)

    def generalized_entropy_index(self, alpha=2):
        r"""Generalized entropy index is proposed as a unified individual and
        group fairness measure in [3]_.  With :math:`b_i = \hat{y}_i - y_i + 1`:
        .. math::
           \mathcal{E}(\alpha) = \begin{cases}
               \frac{1}{n \alpha (\alpha-1)}\sum_{i=1}^n\left[\left(\frac{b_i}{\mu}\right)^\alpha - 1\right],& \alpha \ne 0, 1,\\
               \frac{1}{n}\sum_{i=1}^n\frac{b_{i}}{\mu}\ln\frac{b_{i}}{\mu},& \alpha=1,\\
               -\frac{1}{n}\sum_{i=1}^n\ln\frac{b_{i}}{\mu},& \alpha=0.
           \end{cases}
        Args:
            alpha (int): Parameter that regulates the weight given to distances
                between values at different parts of the distribution.
        References:
            .. [3] T. Speicher, H. Heidari, N. Grgic-Hlaca, K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar,
               "A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual and Group Unfairness via Inequality Indices,"
               ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2018.
        """
        y_pred = self.classified_dataset.labels.ravel()
        y_true = self.dataset.labels.ravel()
        y_pred = (y_pred == self.classified_dataset.favorable_label).astype(
            np.float64)
        y_true = (y_true == self.dataset.favorable_label).astype(np.float64)
        b = 1 + y_pred - y_true

        if alpha == 1:
            # moving the b inside the log allows for 0 values
            return np.mean(np.log((b / np.mean(b))**b) / np.mean(b))
        elif alpha == 0:
            return -np.mean(np.log(b / np.mean(b)) / np.mean(b))
        else:
            return np.mean((b / np.mean(b))**alpha - 1) / (alpha * (alpha - 1))

    def _between_group_generalized_entropy_index(self, groups, alpha=2):
        r"""Between-group generalized entropy index is proposed as a group
        fairness measure in [2]_ and is one of two terms that the generalized
        entropy index decomposes to.
        Args:
            groups (list): A list of groups over which to calculate this metric.
                Groups should be disjoint. By default, this will use the
                `privileged_groups` and `unprivileged_groups` as the only two
                groups.
            alpha (int): See :meth:`generalized_entropy_index`.
        References:
            .. [2] T. Speicher, H. Heidari, N. Grgic-Hlaca, K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar,
               "A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual and Group Unfairness via Inequality Indices,"
               ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2018.
        """
        b = np.zeros(self.dataset.labels.size, dtype=np.float64)

        for group in groups:
            classified_group = utils.compute_boolean_conditioning_vector(
                self.classified_dataset.protected_attributes,
                self.classified_dataset.protected_attribute_names,
                condition=group)
            true_group = utils.compute_boolean_conditioning_vector(
                self.dataset.protected_attributes,
                self.dataset.protected_attribute_names,
                condition=group)
            # ignore if there are no members of this group present
            if not np.any(true_group):
                continue
            y_pred = self.classified_dataset.labels[classified_group].ravel()
            y_true = self.dataset.labels[true_group].ravel()
            y_pred = (y_pred == self.classified_dataset.favorable_label).astype(
                np.float64)
            y_true = (y_true == self.dataset.favorable_label).astype(np.float64)
            b[true_group] = np.mean(1 + y_pred - y_true)

        if alpha == 1:
            return np.mean(np.log((b / np.mean(b))**b) / np.mean(b))
        elif alpha == 0:
            return -np.mean(np.log(b / np.mean(b)) / np.mean(b))
        else:
            return np.mean((b / np.mean(b))**alpha - 1) / (alpha * (alpha - 1))

    def between_all_groups_generalized_entropy_index(self, alpha=2):
        """Between-group generalized entropy index that uses all combinations of
        groups based on `self.dataset.protected_attributes`. See
        :meth:`_between_group_generalized_entropy_index`.
        Args:
            alpha (int): See :meth:`generalized_entropy_index`.
        """
        all_values = list(map(np.concatenate, zip(
            self.dataset.privileged_protected_attributes,
            self.dataset.unprivileged_protected_attributes)))
        groups = [[dict(zip(self.dataset.protected_attribute_names, vals))]
                  for vals in product(*all_values)]
        return self._between_group_generalized_entropy_index(groups=groups,
            alpha=alpha)

    def between_group_generalized_entropy_index(self, alpha=2):
        """Between-group generalized entropy index that uses
        `self.privileged_groups` and `self.unprivileged_groups` as the only two
        groups. See :meth:`_between_group_generalized_entropy_index`.
        Args:
            alpha (int): See :meth:`generalized_entropy_index`.
        """
        groups = [self._to_condition(False), self._to_condition(True)]
        return self._between_group_generalized_entropy_index(groups=groups,
            alpha=alpha)

    def theil_index(self):
        r"""The Theil index is the :meth:`generalized_entropy_index` with
        :math:`\alpha = 1`.
        """
        return self.generalized_entropy_index(alpha=1)

    def coefficient_of_variation(self):
        r"""The coefficient of variation is the square root of two times the
        :meth:`generalized_entropy_index` with :math:`\alpha = 2`.
        """
        return np.sqrt(2*self.generalized_entropy_index(alpha=2))

    def between_group_theil_index(self):
        r"""The between-group Theil index is the
        :meth:`between_group_generalized_entropy_index` with :math:`\alpha = 1`.
        """
        return self.between_group_generalized_entropy_index(alpha=1)

    def between_group_coefficient_of_variation(self):
        r"""The between-group coefficient of variation is the square
        root of two times the :meth:`between_group_generalized_entropy_index` with
        :math:`\alpha = 2`.
        """
        return np.sqrt(2*self.between_group_generalized_entropy_index(alpha=2))

    def between_all_groups_theil_index(self):
        r"""The between-group Theil index is the
        :meth:`between_all_groups_generalized_entropy_index` with
        :math:`\alpha = 1`.
        """
        return self.between_all_groups_generalized_entropy_index(alpha=1)

    def between_all_groups_coefficient_of_variation(self):
        r"""The between-group coefficient of variation is the square
        root of two times the :meth:`between_all_groups_generalized_entropy_index` with
        :math:`\alpha = 2`.
        """
        return np.sqrt(2*self.between_all_groups_generalized_entropy_index(
            alpha=2))

    def differential_fairness_bias_amplification(self, concentration=1.0):
        """Bias amplification is the difference in smoothed EDF between the
        classifier and the original dataset. Positive values mean the bias
        increased due to the classifier.
        Args:
            concentration (float, optional): Concentration parameter for
                Dirichlet smoothing. Must be non-negative.
        """
        ssr = self._smoothed_base_rates(self.classified_dataset.labels,
                                        concentration)

        def pos_ratio(i, j):
            return abs(np.log(ssr[i]) - np.log(ssr[j]))

        def neg_ratio(i, j):
            return abs(np.log(1 - ssr[i]) - np.log(1 - ssr[j]))

        edf_clf = max(max(pos_ratio(i, j), neg_ratio(i, j))
                for i in range(len(ssr)) for j in range(len(ssr)) if i != j)
        edf_data = self.smoothed_empirical_differential_fairness(concentration)

        return edf_clf - edf_data

    # ============================== ALIASES ===================================
    def equal_opportunity_difference(self):
        """Alias of :meth:`true_positive_rate_difference`."""
        return self.true_positive_rate_difference()

    def power(self, privileged=None):
        """Alias of :meth:`num_true_positives`."""
        return self.num_true_positives(privileged=privileged)

    def precision(self, privileged=None):
        """Alias of :meth:`positive_predictive_value`."""
        return self.positive_predictive_value(privileged=privileged)

    def recall(self, privileged=None):
        """Alias of :meth:`true_positive_rate`."""
        return self.true_positive_rate(privileged=privileged)

    def sensitivity(self, privileged=None):
        """Alias of :meth:`true_positive_rate`."""
        return self.true_positive_rate(privileged=privileged)

    def specificity(self, privileged=None):
        """Alias of :meth:`true_negative_rate`."""
        return self.true_negative_rate(privileged=privileged)

In [None]:
class EqOddsPostprocessing(Transformer):
    """Equalized odds postprocessing is a post-processing technique that solves
    a linear program to find probabilities with which to change output labels to
    optimize equalized odds [8]_ [9]_.
    References:
        .. [8] M. Hardt, E. Price, and N. Srebro, "Equality of Opportunity in
           Supervised Learning," Conference on Neural Information Processing
           Systems, 2016.
        .. [9] G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
           K. Q. Weinberger, "On Fairness and Calibration," Conference on Neural
           Information Processing Systems, 2017.
    """

    def __init__(self, unprivileged_groups, privileged_groups, seed=None):
        """
        Args:
            unprivileged_groups (list(dict)): Representation for unprivileged
                group.
            privileged_groups (list(dict)): Representation for privileged
                group.
            seed (int, optional): Seed to make `predict` repeatable.
        """
        super(EqOddsPostprocessing, self).__init__(
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups,
            seed=seed)

        self.seed = seed
        self.model_params = None
        self.unprivileged_groups = unprivileged_groups
        self.privileged_groups = privileged_groups

    def fit(self, dataset_true, dataset_pred):
        """Compute parameters for equalizing odds using true and predicted
        labels.
        Args:
            true_dataset (BinaryLabelDataset): Dataset containing true labels.
            pred_dataset (BinaryLabelDataset): Dataset containing predicted
                labels.
        Returns:
            EqOddsPostprocessing: Returns self.
        """
        metric = ClassificationMetric(dataset_true, dataset_pred,
            unprivileged_groups=self.unprivileged_groups,
            privileged_groups=self.privileged_groups)

        # compute basic statistics
        sbr = metric.base_rate(privileged=True)
        obr = metric.base_rate(privileged=False)

        fpr0 = metric.false_positive_rate(privileged=True)
        fpr1 = metric.false_positive_rate(privileged=False)
        fnr0 = metric.false_negative_rate(privileged=True)
        fnr1 = metric.false_negative_rate(privileged=False)
        tpr0 = metric.true_positive_rate(privileged=True)
        tpr1 = metric.true_positive_rate(privileged=False)
        tnr0 = metric.true_negative_rate(privileged=True)
        tnr1 = metric.true_negative_rate(privileged=False)

        # linear program has 4 decision variables:
        # [Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 1];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 1]]
        # Coefficients of the linear objective function to be minimized.
        c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1])

        # A_ub - 2-D array which, when matrix-multiplied by x, gives the values
        # of the upper-bound inequality constraints at x
        # b_ub - 1-D array of values representing the upper-bound of each
        # inequality constraint (row) in A_ub.
        # Just to keep these between zero and one
        A_ub = np.array([[ 1,  0,  0,  0],
                         [-1,  0,  0,  0],
                         [ 0,  1,  0,  0],
                         [ 0, -1,  0,  0],
                         [ 0,  0,  1,  0],
                         [ 0,  0, -1,  0],
                         [ 0,  0,  0,  1],
                         [ 0,  0,  0, -1]], dtype=np.float64)
        b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64)

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.unprivileged_groups)

        sconst = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label)
        sflip = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.unfavorable_label)
        oconst = np.ravel(
            dataset_pred.labels[cond_vec_unpriv] == dataset_pred.favorable_label)
        oflip = np.ravel(
            dataset_pred.labels[cond_vec_unpriv] == dataset_pred.unfavorable_label)

        y_true = dataset_true.labels.ravel()

        sm_tn = np.logical_and(sflip,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_fn = np.logical_and(sflip,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)
        sm_fp = np.logical_and(sconst,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_tp = np.logical_and(sconst,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)

        om_tn = np.logical_and(oflip,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_fn = np.logical_and(oflip,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)
        om_fp = np.logical_and(oconst,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_tp = np.logical_and(oconst,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)

        # A_eq - 2-D array which, when matrix-multiplied by x,
        # gives the values of the equality constraints at x
        # b_eq - 1-D array of values representing the RHS of each equality
        # constraint (row) in A_eq.
        # Used to impose equality of odds constraint
        A_eq = [[(np.mean(sconst*sm_tp) - np.mean(sflip*sm_tp)) / sbr,
                 (np.mean(sflip*sm_fn) - np.mean(sconst*sm_fn)) / sbr,
                 (np.mean(oflip*om_tp) - np.mean(oconst*om_tp)) / obr,
                 (np.mean(oconst*om_fn) - np.mean(oflip*om_fn)) / obr],
                [(np.mean(sconst*sm_fp) - np.mean(sflip*sm_fp)) / (1-sbr),
                 (np.mean(sflip*sm_tn) - np.mean(sconst*sm_tn)) / (1-sbr),
                 (np.mean(oflip*om_fp) - np.mean(oconst*om_fp)) / (1-obr),
                 (np.mean(oconst*om_tn) - np.mean(oflip*om_tn)) / (1-obr)]]

        b_eq = [(np.mean(oflip*om_tp) + np.mean(oconst*om_fn)) / obr
              - (np.mean(sflip*sm_tp) + np.mean(sconst*sm_fn)) / sbr,
                (np.mean(oflip*om_fp) + np.mean(oconst*om_tn)) / (1-obr)
              - (np.mean(sflip*sm_fp) + np.mean(sconst*sm_tn)) / (1-sbr)]

        # Linear program
        self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)

        return self

    def predict(self, dataset):
        """Perturb the predicted labels to obtain new labels that satisfy
        equalized odds constraints.
        Args:
            dataset (BinaryLabelDataset): Dataset containing labels that needs
                to be transformed.
            dataset (BinaryLabelDataset): Transformed dataset.
        """
        if self.seed is not None:
            np.random.seed(self.seed)

        # Get the model parameters output from fit
        sp2p, sn2p, op2p, on2p = self.model_params.x

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset.protected_attributes, dataset.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset.protected_attributes, dataset.protected_attribute_names,
            self.unprivileged_groups)

        # Randomly flip labels according to the probabilities in model_params
        self_fair_pred = dataset.labels[cond_vec_priv].copy()
        self_pp_indices, _ = np.nonzero(
            dataset.labels[cond_vec_priv] == dataset.favorable_label)
        self_pn_indices, _ = np.nonzero(
            dataset.labels[cond_vec_priv] == dataset.unfavorable_label)
        np.random.shuffle(self_pp_indices)
        np.random.shuffle(self_pn_indices)

        n2p_indices = self_pn_indices[:int(len(self_pn_indices) * sn2p)]
        self_fair_pred[n2p_indices] = dataset.favorable_label
        p2n_indices = self_pp_indices[:int(len(self_pp_indices) * (1 - sp2p))]
        self_fair_pred[p2n_indices] = dataset.unfavorable_label

        othr_fair_pred = dataset.labels[cond_vec_unpriv].copy()
        othr_pp_indices, _ = np.nonzero(
            dataset.labels[cond_vec_unpriv] == dataset.favorable_label)
        othr_pn_indices, _ = np.nonzero(
            dataset.labels[cond_vec_unpriv] == dataset.unfavorable_label)
        np.random.shuffle(othr_pp_indices)
        np.random.shuffle(othr_pn_indices)

        n2p_indices = othr_pn_indices[:int(len(othr_pn_indices) * on2p)]
        othr_fair_pred[n2p_indices] = dataset.favorable_label
        p2n_indices = othr_pp_indices[:int(len(othr_pp_indices) * (1 - op2p))]
        othr_fair_pred[p2n_indices] = dataset.unfavorable_label

        # Mutated, fairer dataset with new labels
        dataset_new = dataset.copy()

        new_labels = np.zeros_like(dataset.labels, dtype=np.float64)
        new_labels[cond_vec_priv] = self_fair_pred
        new_labels[cond_vec_unpriv] = othr_fair_pred

        dataset_new.labels = new_labels

        return dataset_new

    def fit_predict(self, dataset_true, dataset_pred):
        """fit and predict methods sequentially."""
        return self.fit(dataset_true, dataset_pred).predict(dataset_pred)