Trusted-AI · hoffmansc · Jul 23, 2023 · Jun 7, 2023 · Jun 7, 2023 · Jun 7, 2023
diff --git a/aif360/metrics/ot_metric.py b/aif360/metrics/ot_metric.py
@@ -0,0 +1,225 @@
+from typing import Union
+import pandas as pd
+import numpy as np
+import ot
+from sklearn.preprocessing import LabelEncoder
+
+def _normalize(distribution1, distribution2):
+    """
+    Transform distributions to pleasure form, that is their sums are equal to 1,
+    and in case if there is negative values, increase all values with absolute value of smallest number.
+
+    Args:
+        distribution1 (numpy array): nontreated distribution
+        distribution2 (numpy array): nontreated distribution
+    """
+    if np.minimum(np.min(distribution1), np.min(distribution2)) < 0:
+        extra = -np.minimum(np.min(distribution1), np.min(distribution2))
+        distribution1 += extra
+        distribution2 += extra
+
+    total_of_distribution1 = np.sum(distribution1)
+    if total_of_distribution1 != 0:
+        distribution1 /= total_of_distribution1
+    total_of_distribution2 = np.sum(distribution2)
+    if total_of_distribution2 != 0:
+        distribution2 /= total_of_distribution2
+
+def _transform(ground_truth, classifier, cost_matrix=None):
+    """
+    Transform given distributions from pandas type to numpy arrays, and _normalize them.
+    Rearanges distributions, with totall data allocated of one.
+    Generates matrix distance with respect to (ground_truth[i] - classifier[j])^2.
+
+    Args:
+        ground_truth (series): ground truth (correct) target values
+        classifier (series,  dataframe, optional): pandas series estimated targets
+            as returned by a model for binary, continuous and ordinal modes.
+
+    Returns:
+        initial_distribution, which is an processed ground_truth (numpy array)
+        required_distribution, which is an processed classifier (numpy array)
+        matrix_distance, which stores the distances between the cells of distributions (2d numpy array)
+    """
+    initial_distribution = ground_truth.to_numpy().astype(float)
+    required_distribution = classifier.to_numpy().astype(float)
+
+    _normalize(initial_distribution, required_distribution)
+
+    if cost_matrix is not None:
+        matrix_distance = cost_matrix
+    else:
+        matrix_distance = np.array([abs(i - required_distribution) for i in initial_distribution], dtype=float)
+    return initial_distribution, required_distribution, matrix_distance
+
+def _evaluate(
+        ground_truth: pd.Series,
+        classifier: pd.Series,
+        prot_attr: pd.Series=None,
+        num_iters=1e5,
+        cost_matrix: np.ndarray=None,
+        **kwargs):
+    """calculate Wasserstein distance between groups defined by `prot_attr` in `ground_truth` and `classifier`.
+
+    Args:
+        ground_truth (pd.Series, str): ground truth (correct) target value
+        classifier (pd.Series): estimated target values
+        prot_attr (pd.Series, str): pandas series of sensitive attribute values
+        num_iters (int, optional): number of iterations (random restarts). Should be positive.
+
+    Returns:
+        ot.emd2 (float, dict): Earth mover's distance or dictionary of optimal transports for each of option of classifier
+    """
+
+    # Calculate just the EMD between ground_truth and classifier
+    if prot_attr is None:
+        initial_distribution, required_distribution, matrix_distance = _transform(ground_truth, classifier, cost_matrix)
+        return ot.emd2(a=initial_distribution, b=required_distribution, M=matrix_distance, numItermax=num_iters)
+
+    if not ground_truth.nunique() == 2:
+        raise ValueError(f"Expected to have exactly 2 target values, got {ground_truth.nunique()}.")
+
+    # Calculate EMD between ground truth distribution and distribution of each group
+    emds = {}
+    for sa_val in sorted(prot_attr.unique()):
+        initial_distribution = ground_truth[prot_attr == sa_val]
+        required_distribution = classifier[prot_attr == sa_val]
+        initial_distribution, required_distribution, matrix_distance = _transform(initial_distribution, required_distribution, cost_matrix)
+        emds[sa_val] = ot.emd2(a=initial_distribution, b=required_distribution, M=matrix_distance, numItermax=num_iters)
+
+    return emds
+
+
+def ot_bias_scan(
+    ground_truth: pd.Series,
+    classifier: Union[pd.Series, pd.DataFrame],
+    prot_attr: pd.Series = None,
+    favorable_value: Union[str, float] = None,
+    scoring: str = "Wasserstein1",
+    num_iters: int = 1e5,
+    penalty: float = 1e-17,
+    mode: str = "binary",
+    cost_matrix: np.ndarray=None,
+    **kwargs,
+):
+    """Normalize and calculate Wasserstein distance between groups defined by `prot_attr` in `ground_truth` and `classifier`.
+
+    Args:
+        ground_truth (pd.Series, str): ground truth (correct) target values.
+        classifier (pd.Series, pd.DataFrame, str): estimated target values.
+            If `mode` is nominal, must be a dataframe with columns containing predictions for each nominal class.
+            If `None`, model is assumed to be a dummy model that predicts the mean of the targets
+                or 1/(number of categories) for nominal mode.
+        prot_attr (pd.Series, str): sensitive attribute values.
+            If `None`, assume all samples belong to the same protected group.
+        favorable_value(str, float, optional): Either "high", "low" or a float value if the mode in [binary, ordinal, or continuous].
+                If float, value has to be the minimum or the maximum in the ground_truth column.
+                Defaults to high if None for these modes.
+                Support for float left in to keep the intuition clear in binary classification tasks.
+                If `mode` is nominal, favorable values should be one of the unique categories in the ground_truth.
+                Defaults to a one-vs-all scan if None for nominal mode.
+        scoring (str or class): only 'Wasserstein1'
+        num_iters (int, optional): number of iterations (random restarts) for EMD. Should be positive.
+        penalty (float, optional): penalty term. Should be positive. The penalty term as with any regularization parameter
+            may need to be tuned for a particular use case. The higher the penalty, the higher the influence of entropy regualizer.
+        mode: one of ['binary', 'continuous', 'nominal', 'ordinal']. Defaults to binary.
+                In nominal mode, up to 10 categories are supported by default.
+                To increase this, pass in keyword argument max_nominal = integer value.
+        cost_matrix (np.ndarray): cost matrix for the Wasserstein distance. Defaults to absolute difference between samples.
+
+    Returns:
+        ot.emd2 (float, dict): Earth mover's distance or dictionary of optimal transports for each of option of classifier
+
+    Raises:
+        ValueError: if `mode` is 'binary' but `ground_truth` contains less than 1 or more than 2 unique values.
+    """
+
+    # Assert correct mode passed
+    if mode not in ['binary', 'continuous', 'nominal', 'ordinal']:
+        raise ValueError(f"Expected one of {['binary', 'continuous', 'nominal', 'ordinal']}, got {mode}.")
+
+    # Assert correct types passed to ground_truth, classifier and prot_attr
+    if not isinstance(ground_truth, (pd.Series, str)):
+        raise TypeError(f"ground_truth: expected pd.Series or str, got {type(ground_truth)}")
+    if classifier is not None:
+        if mode in ["binary", "continuous"] and not isinstance(classifier, pd.Series):
+            raise TypeError(f"classifier: expected pd.Series for {mode} mode, got {type(classifier)}")
+        if mode in ["nominal", "ordinal"] and not isinstance(classifier, pd.DataFrame):
+            raise TypeError(f"classifier: expected pd.DataFrame for {mode} mode, got {type(classifier)}")
+    if prot_attr is not None and not isinstance(prot_attr, (pd.Series, str)):
+        raise TypeError(f"prot_attr: expected pd.Series or str, got {type(prot_attr)}")
+
+    # Assert correct type passed to cost_matrix
+    if cost_matrix is not None and not isinstance(cost_matrix, np.ndarray):
+        raise TypeError(f"cost_matrix: expected numpy.ndarray, got {type(cost_matrix)}")
+
+    # Assert scoring is "Wasserstein1"
+    if not scoring == "Wasserstein1":
+        raise ValueError(f"Scoring mode can only be \"Wasserstein1\", got {scoring}")
+
+    grt = ground_truth.copy()
+
+    if classifier is not None:
+        cls = classifier.copy()
+        if prot_attr is not None:
+            cls.index = grt.index
+    else:
+        cls = None
+
+    if prot_attr is not None:
+        sat = prot_attr.copy()
+        sat.index = grt.index
+    else:
+        sat = None
+
+    uniques = list(grt.unique())
+    if mode == "binary":
+        if len(uniques) > 2:
+            raise ValueError(f"Only 2 unique values allowed in ground_truth for binary mode, got {uniques}")
+
+    # Encode variables
+    if not np.issubdtype(grt.dtype, np.number):
+        grt_encoder = LabelEncoder().fit(grt)
+        grt = pd.Series(grt_encoder.transform(grt))
+
+    # Set correct favorable value (this tells us if higher or lower is better)
+    min_val, max_val = grt.min(), grt.max()
+
+    if favorable_value == 'high':
+        favorable_value = max_val
+    elif favorable_value == 'low':
+        favorable_value = min_val
+    elif favorable_value is None:
+        if mode in ["binary", "ordinal", "continuous"]:
+            favorable_value = max_val # Default to higher is better
+        elif mode == "nominal":
+            favorable_value = "flag-all" # Default to scan through all categories
+
+    if favorable_value not in [min_val, max_val, "flag-all", *uniques,]:
+        raise ValueError(f"Favorable_value should be high, low, or one of categories {uniques}, got {favorable_value}.")
+
+    if mode == "binary": # Flip ground truth if favorable_value is 0 in binary mode.
+        grt = pd.Series(grt == favorable_value, dtype=int)
+        if cls is None:
+            cls = pd.Series(grt.mean(), index=grt.index)
+        emds = _evaluate(grt, cls, sat, num_iters, cost_matrix, **kwargs)
+
+    elif mode == "continuous":
+        if cls is None:
+            cls = pd.Series(grt.mean(), index=grt.index)
+        emds = _evaluate(grt, cls, sat, num_iters,cost_matrix, **kwargs)
+
+    ## TODO: rework ordinal mode to take into account distance between pred and true
+    elif mode in  ["nominal", "ordinal"]:
+        if cls is None: # Set classifier to 1/(num of categories) for nominal mode
+            cls = pd.DataFrame([pd.Series(1 / grt.nunique(), index=grt.index)]*grt.nunique())
+        if grt.nunique() != cls.shape[-1]:
+            raise ValueError(
+                f"classifier must have  a column for each class. Expected shape [:, {grt.nunique()}], got {cls.shape}")
+        emds = {}
+        for class_label in uniques:
+            grt_cl = grt.map({class_label: 1}).fillna(0)
+            cls_cl = cls[class_label]
+            emds[class_label] = _evaluate(grt_cl, cls_cl, sat, num_iters, cost_matrix, **kwargs)
+
+    return emds
diff --git a/aif360/sklearn/detectors/detectors.py b/aif360/sklearn/detectors/detectors.py
@@ -1,10 +1,9 @@
-from typing import Union
-
 from aif360.detectors import bias_scan
 from aif360.detectors.mdss.ScoringFunctions import ScoringFunction
 
+from typing import Union
 import pandas as pd
-
+import numpy as np
 
 def bias_scan(
     X: pd.DataFrame,

diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
@@ -1,4 +1,5 @@
 from itertools import permutations
+from typing import Union
 
 import numpy as np
 import pandas as pd
@@ -10,10 +11,12 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.deprecation import deprecated
 
+from aif360.metrics import ot_metric
 from aif360.sklearn.utils import check_inputs, check_groups
 from aif360.detectors.mdss.ScoringFunctions import BerkJones, Bernoulli
 from aif360.detectors.mdss.MDSS import MDSS
 
+
 __all__ = [
     # meta-metrics
     'difference', 'ratio', 'intersection', 'one_vs_rest',
@@ -24,7 +27,7 @@
     'specificity_score', 'base_rate', 'selection_rate', 'smoothed_base_rate',
     'smoothed_selection_rate', 'generalized_fpr', 'generalized_fnr',
     # group fairness
-    'statistical_parity_difference', 'disparate_impact_ratio',
+    'ot_bias_scan', 'statistical_parity_difference', 'disparate_impact_ratio',
     'equal_opportunity_difference', 'average_odds_difference', 'average_predictive_value_difference',
     'average_odds_error', 'class_imbalance', 'kl_divergence',
     'conditional_demographic_disparity', 'smoothed_edf',
@@ -499,6 +502,67 @@ def generalized_fnr(y_true, probas_pred, *, pos_label=1, sample_weight=None,
 
 
 # ============================ GROUP FAIRNESS ==================================
+def ot_bias_scan(
+    y_true: pd.Series,
+    y_pred: Union[pd.Series, pd.DataFrame],
+    prot_attr: pd.Series = None,
+    pos_label: Union[str, float] = None,
+    overpredicted: bool = True,
+    scoring: str = "Wasserstein1",
+    num_iters: int = 1e5,
+    penalty: float = 1e-17,
+    mode: str = "binary",
+    cost_matrix: np.ndarray=None,
+    **kwargs,
+):
+    """Normalize and calculate Wasserstein distance between groups defined by `prot_attr` in `y_true` and `y_pred`.
+
+    Args:
+        y_true (pd.Series): ground truth (correct) target values.
+        y_pred (pd.Series, pd.DataFrame): estimated target values.
+            If `mode` is nominal, must be a `pd.DataFrame` with columns containing predictions for each nominal class,
+                or list of corresponding column names in `data`.
+            If `None`, model is assumed to be a dummy model that predicts the mean of the targets
+                or 1/(number of categories) for nominal mode.
+        sensitive_attribute (pd.Series): sensitive attribute values.
+            If `None`, assume all samples belong to the same protected group.
+        pos_label(str, float, optional): Either "high", "low" or a float value if the mode in [binary, ordinal, or continuous].
+                If float, value has to be the minimum or the maximum in the ground_truth column.
+                Defaults to high if None for these modes.
+                Support for float left in to keep the intuition clear in binary classification tasks.
+                If `mode` is nominal, favorable values should be one of the unique categories in the ground_truth.
+                Defaults to a one-vs-all scan if None for nominal mode.
+        overpredicted (bool, optional): flag for group to scan for.
+            `True` scans for overprediction, `False` scans for underprediction.
+        scoring (str or class): only 'Wasserstein1'
+        num_iters (int, optional): number of iterations (random restarts) for EMD. Should be positive.
+        penalty (float, optional): penalty term. Should be positive. The penalty term as with any regularization parameter
+            may need to be tuned for a particular use case. The higher the penalty, the higher the influence of entropy regualizer.
+        mode: one of ['binary', 'continuous', 'nominal', 'ordinal']. Defaults to binary.
+                In nominal mode, up to 10 categories are supported by default.
+                To increase this, pass in keyword argument max_nominal = integer value.
+        cost_matrix (np.ndarray): cost matrix for the Wasserstein distance. Defaults to absolute difference between samples.
+
+    Returns:
+        ot.emd2 (float, dict): Earth mover's distance or dictionary of optimal transports for each of option of classifier
+
+    Raises:
+        ValueError: if `mode` is 'binary' but `ground_truth` contains less than 1 or more than 2 unique values.
+    """
+    return ot_metric.ot_bias_scan(
+        ground_truth=y_true,
+        classifier=y_pred,
+        prot_attr=prot_attr,
+        favorable_value=pos_label,
+        overpredicted=overpredicted,
+        scoring=scoring,
+        num_iters=num_iters,
+        penalty=penalty,
+        mode=mode,
+        cost_matrix=cost_matrix,
+        kwargs=kwargs
+    )
+
 def statistical_parity_difference(y_true, y_pred=None, *, prot_attr=None,
                                   priv_group=1, pos_label=1, sample_weight=None):
     r"""Difference in selection rates.