In [None]:
from pprint import pformat
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix

In [None]:
class BinaryMetrics:

    def __init__(self, attribute_ys, favorable_outcome=1):
        """
        Parameters
        ----------
        attribute_ys - (dict) {
            '<sensitive attr A>': {'y_true': y_true_A, 'y_pred': y_pred_A},
            '<sensitive attr B>': {'y_true': y_true_B, 'y_pred': y_pred_B}
            }

        -----------
        | TN | FP |
        -----------
        | FN | TP |
        -----------
        """
        self.attribute_ys = attribute_ys
        self.labels, y_trues, y_preds = [], [], []
        self.label_counts = dict()
        for label, ys in attribute_ys.items():
            self.labels.append(label)
            y_trues.append(ys['y_true'])
            y_preds.append(ys['y_pred'])
            self.label_counts[label] = ys['y_true'].shape[0]

        # confusion_matrix(y_true, y_pred) will return wrong dims if all `0`s or all `1`s
        class_confusion_matrices = []
        for y_true, y_pred in zip(y_trues, y_preds):
            if np.all(y_true == 0) and np.all(y_pred == 0):
                class_confusion_matrices.append(np.array([[y_true.size, 0], [0, 0]]))
            elif np.all(y_true == 1) and np.all(y_pred == 1):
                class_confusion_matrices.append(np.array([[0, 0], [0, y_true.size]]))
            else:
                class_confusion_matrices.append(confusion_matrix(y_true, y_pred))

        self.TNs = np.array([cm[0, 0] for cm in class_confusion_matrices])
        self.FPs = np.array([cm[0, 1] for cm in class_confusion_matrices])
        self.FNs = np.array([cm[1, 0] for cm in class_confusion_matrices])
        self.TPs = np.array([cm[1, 1] for cm in class_confusion_matrices])
        self.actual_positives = self.TPs + self.FNs
        self.support = sum(self.actual_positives)
        self.actual_negatives = self.TNs + self.FPs
        self.favorable_outcome = favorable_outcome

    @property
    def base_rates(self):
        metric = self.actual_positives / (self.actual_positives + self.actual_negatives)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def positive_rates(self):
        metric = (self.TPs + self.FPs) / (self.actual_positives + self.actual_negatives)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def negative_rates(self):
        metric = (self.TNs + self.FNs) / (self.actual_positives + self.actual_negatives)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def accuracies(self):
        metric = (self.TPs + self.TNs) / (self.actual_positives + self.actual_negatives)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def misclassification_rates(self):
        metric = (self.FNs + self.FPs) / (self.actual_positives + self.actual_negatives)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def true_positive_rates(self):
        """
        Also known as sensitivity, recall, or hit rate.
        """
        metric = self.TPs / self.actual_positives
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def true_negative_rates(self):
        """
        Also known as specificity or selectivity.
        """
        metric = self.TNs / self.actual_negatives
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def false_positive_rates(self):
        """
        Also known as fall-out.
        """
        metric = self.FPs / self.actual_negatives
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def false_negative_rates(self):
        """
        Also known as miss rate.
        """
        metric = self.FNs / self.actual_positives
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def false_discovery_rates(self):
        metric = self.FPs / (self.TPs + self.FPs)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def positive_predictive_values(self):
        """
        Also known as precision.
        """
        metric = self.TPs / (self.TPs + self.FPs)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def false_omission_rates(self):
        metric = self.FNs / (self.TNs + self.FNs)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def negative_predictive_values(self):
        metric = self.TNs / (self.TNs + self.FNs)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def demographic_parity(self):
        if self.favorable_outcome == 1:
            return self.positive_rates
        return self.negative_rates

    def conditional_statistical_parity(self, legitimate_factors):
        """
        Parameters
        ----------
        legitimate_factors - (dict) {
            '<sensitive attr A>': sample_legitimate_factors_for_A,  # (num_samples,) array
            '<sensitive attr B>': sample_legitimate_factors_for_B   # (num_samples,) array
            }
        """
        attribute_ys = dict(**self.attribute_ys)
        for sens_attr in attribute_ys:
            attribute_ys[sens_attr]['y_true'] = attribute_ys[sens_attr]['y_true'][np.where(legitimate_factors[sens_attr] == 1)[0]]
            attribute_ys[sens_attr]['y_pred'] = attribute_ys[sens_attr]['y_pred'][np.where(legitimate_factors[sens_attr] == 1)[0]]
        bm = BinaryMetrics(attribute_ys, favorable_outcome=self.favorable_outcome)
        dp = bm.demographic_parity
        metric = np.array([dp[label] for label in self.labels])
        return dict(zip(bm.labels, metric), support=bm.support)

    @property
    def equal_selection_parity(self):
        metric = (self.TPs + self.FPs) if self.favorable_outcome == 1 else (self.TNs + self.FNs)
        return dict(zip(self.labels, metric), support=self.support)

    @property
    def conditional_use_accuracy_equality(self):
        npv, ppv = self.negative_predictive_values, self.positive_predictive_values
        metric = np.array([[npv[label] for label in self.labels],
                           [ppv[label] for label in self.labels]])
        result = dict(zip(self.labels, metric.T))
        result = {k: {'NPV': v[0], 'PPV': v[1]} for k, v in result.items()}
        result.update({'support': self.support})
        return result

    @property
    def predictive_parity(self):
        return self.positive_predictive_values

    @property
    def calibration(self):
        for_, fdr = self.false_omission_rates, self.false_discovery_rates
        metric = np.array([[for_[label] for label in self.labels],
                           [fdr[label] for label in self.labels]])
        result = dict(zip(self.labels, metric.T))
        result = {k: {'FOR': v[0], 'FDR': v[1]} for k, v in result.items()}
        result.update({'support': self.support})
        return result

    @property
    def equalized_odds(self):
        tnr, tpr = self.true_negative_rates, self.true_positive_rates
        metric = np.array([[tnr[label] for label in self.labels],
                           [tpr[label] for label in self.labels]])
        result = dict(zip(self.labels, metric.T))
        result = {k: {'TNR': v[0], 'TPR': v[1]} for k, v in result.items()}
        result.update({'support': self.support})
        return result

    @property
    def equalized_opportunities(self):
        return self.true_positive_rates if self.favorable_outcome == 1 else self.true_negative_rates

    @property
    def predictive_equality(self):
        return self.true_positive_rates if self.favorable_outcome == 0 else self.true_negative_rates

    def __repr__(self):
        elements = []
        elements.append(' - '.join(str(label) for label in self.labels))
        for attr in dir(self):
            if attr.startswith('__'):
                continue
            element = getattr(self, attr)
            if isinstance(element, dict):
                elements.append(attr.upper())
                elements.append(element)
        return pformat(elements, indent=4)

In [None]:
class MultiClassBinaryMetrics:

    def __init__(self, attribute_ys, favorable_outcome=1, class_labels=None):
        """
        Parameters
        ----------
        attribute_ys - (dict) {
            '<sensitive attr A>': {'y_true': y_true_As, 'y_pred': y_pred_As},
            '<sensitive attr B>': {'y_true': y_true_Bs, 'y_pred': y_pred_Bs}
        }
        """
        num_classes = list(attribute_ys.items())[0][1]['y_true'].shape[1]
        self.class_labels = list(range(num_classes)) if class_labels is None else class_labels
        self.multiclass_binary_metrics = dict()
        for col_index in range(num_classes):
            attrib_ys = dict()
            for attribute in attribute_ys:
                y_true = attribute_ys[attribute]['y_true'][:, col_index].flatten()
                y_pred = attribute_ys[attribute]['y_pred'][:, col_index].flatten()
                attrib_ys[attribute] = {'y_true': y_true, 'y_pred': y_pred}
            binary_metrics = BinaryMetrics(attrib_ys,
                                           favorable_outcome=favorable_outcome)
            self.multiclass_binary_metrics[self.class_labels[col_index]] = binary_metrics

    def __getattr__(self, attr):
        data = []
        for label in self.class_labels:
            row = getattr(self.multiclass_binary_metrics[label], attr)
            row['label'] = label
            data.append(row)
        return pd.DataFrame(data).set_index('label')

In [None]:
df = pd.DataFrame(dict(a=[2, 4, 3, 5, 4], b=[3, 2, 4, 3, 4]))
df

Unnamed: 0,a,b
0,2,3
1,4,2
2,3,4
3,5,3
4,4,4


In [None]:
df['a'].value_counts()[df['a'].value_counts() > 1].index.values

array([4])

In [None]:
bm = BinaryMetrics({
            '<sensitive attr A>': {'y_true': np.array([1, 1, 0, 0, 1]), 'y_pred': np.array([1, 1, 0, 1, 1])},
            '<sensitive attr B>': {'y_true': np.array([1, 1, 1]), 'y_pred': np.array([1, 0, 0])}
            })

In [None]:
bm.conditional_statistical_parity({
            '<sensitive attr A>': np.array([1, 0, 0, 0, 1]),  # (num_samples,) array
            '<sensitive attr B>': np.array([1, 0, 1])   # (num_samples,) array
            }), bm.demographic_parity

({'<sensitive attr A>': 1.0, '<sensitive attr B>': 0.5, 'support': 4},
 {'<sensitive attr A>': 0.8,
  '<sensitive attr B>': 0.3333333333333333,
  'support': 6})

In [None]:
bm.calibration



{'<sensitive attr A>': {'FDR': 0.4, 'FOR': nan},
 '<sensitive attr B>': {'FDR': 0.0, 'FOR': 1.0},
 'support': 6}

In [None]:
def balance(*probability_score_matrices, balance_for=1):
    return np.stack([np.mean(psm[:, balance_for]) for psm in probability_score_matrices], axis=0)

In [None]:
attribute_ys = {
            'male': {'y_true': np.array([0, 1, 1, 0, 0, 1]), 'y_pred': np.array([0, 1, 1, 0, 0, 1])},
            'female': {'y_true': np.array([1, 1, 1, 0]), 'y_pred': np.array([1, 1, 1, 0])},
            'other': {'y_true': np.array([1, 1, 1, 0]), 'y_pred': np.array([0, 1, 1, 0])}
            }

In [None]:
bmf = BinaryMetrics(attribute_ys=attribute_ys)

In [None]:
bmf

[0.57142857 0.71428571 0.71428571]


[   'male - female - other',
    'ACCURACIES',
    {'female': 1.0, 'male': 1.0, 'other': 0.75},
    'BASE_RATES',
    {'female': 0.75, 'male': 0.5, 'other': 0.75},
    'CALIBRATION',
    {   'female': {'FDR': 0.0, 'FOR': 0.0},
        'male': {'FDR': 0.0, 'FOR': 0.0},
        'other': {'FDR': 0.0, 'FOR': 0.5}},
    'CONDITIONAL_STATISTICAL_PARITY',
    {   'female': 0.5357142857142857,
        'male': 0.2857142857142857,
        'other': 0.35714285714285715},
    'CONDITIONAL_USE_ACCURACY_EQUALITY',
    {   'female': {'NPV': 1.0, 'PPV': 1.0},
        'male': {'NPV': 1.0, 'PPV': 1.0},
        'other': {'NPV': 0.5, 'PPV': 1.0}},
    'DEMOGRAPHIC_PARITY',
    {'female': 0.75, 'male': 0.5, 'other': 0.5},
    'EQUAL_SELECTION_PARITY',
    {'female': 3, 'male': 3, 'other': 2},
    'EQUALIZED_ODDS',
    {   'female': {'TNR': 1.0, 'TPR': 1.0},
        'male': {'TNR': 1.0, 'TPR': 1.0},
        'other': {'TNR': 1.0, 'TPR': 0.6666666666666666}},
    'EQUALIZED_OPPORTUNITIES',
    {'female': 1.0, 

In [None]:
attribute_ys = {
            'male': {'y_true': np.array([[0, 1, 1],
                                         [0, 0, 1],
                                         [1, 1, 1],
                                         [0, 1, 0]]),
                     'y_pred': np.array([[0, 0, 1],
                                         [1, 0, 1],
                                         [1, 1, 1],
                                         [0, 1, 1]])},
            'female': {'y_true': np.array([[0, 1, 0],
                                           [0, 0, 0],
                                           [1, 1, 0]]),
                       'y_pred': np.array([[1, 0, 0],
                                           [1, 0, 0],
                                           [1, 0, 0]])},
            'other': {'y_true': np.array([[1, 1, 1],
                                          [0, 0, 0]]),
                      'y_pred': np.array([[1, 0, 0],
                                          [1, 1, 0]])}
            }

In [None]:
multiclass_binary_metrics = MultiClassBinaryMetrics(attribute_ys)

In [None]:
multiclass_binary_metrics.predictive_equality

Unnamed: 0_level_0,male,female,other
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.666667,0.0,0.0
1,1.0,1.0,0.0
2,0.0,1.0,1.0


In [None]:
multiclass_binary_metrics

In [None]:
dict(zip([1, 2, 3], [4, 5, 6]), support=1)

{1: 4, 2: 5, 3: 6, 'support': 1}