From dd28eadfbbc04ef868d39b5565041642bb5aac5c Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Wed, 28 Jun 2023 16:07:58 +0100
Subject: [PATCH 01/13] Initial commit: bringing code from
 https://github.com/socialfoundations/error-parity

---
 fairlearn/postprocessing/_constants.py        |   4 +
 .../_cvxpy_threshold_optimizer.py             | 395 +++++++++++++++
 fairlearn/postprocessing/_cvxpy_utils.py      | 363 ++++++++++++++
 .../postprocessing/_randomized_classifiers.py | 461 ++++++++++++++++++
 fairlearn/postprocessing/_roc_utils.py        | 164 +++++++
 .../postprocessing/_threshold_operation.py    |   2 +-
 6 files changed, 1388 insertions(+), 1 deletion(-)
 create mode 100644 fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
 create mode 100644 fairlearn/postprocessing/_cvxpy_utils.py
 create mode 100644 fairlearn/postprocessing/_randomized_classifiers.py
 create mode 100644 fairlearn/postprocessing/_roc_utils.py
diff --git a/fairlearn/postprocessing/_constants.py b/fairlearn/postprocessing/_constants.py
index 101718589..108b93fb8 100644
--- a/fairlearn/postprocessing/_constants.py
+++ b/fairlearn/postprocessing/_constants.py
@@ -13,6 +13,10 @@
     "Please make sure to install fairlearn[customplots] to use "
     "the postprocessing plots."
 )
+_CVXPY_IMPORT_ERROR_MESSAGE = (
+    "Please make sure to install `cvxpy` to use postprocessing with relaxed "
+    "fairness constraint fulfillment."
+)
 BASE_ESTIMATOR_NONE_ERROR_MESSAGE = "The base estimator cannot be `None`."
 BASE_ESTIMATOR_NOT_FITTED_WARNING = (
     "The value of `prefit` is `True`, but `check_is_fitted` raised `NotFittedError` on"
diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
new file mode 100644
index 000000000..2e3c48f21
--- /dev/null
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -0,0 +1,395 @@
+"""Threshold optimizer with relaxed fairness constraints.
+
+TODO
+----
+- Add option for constraining only equality of FPR or TPR (currently it must be 
+both -> equal odds);
+- Add option for constraining equality of positive predictions (independence
+criterion, aka demographic parity);
+- Add option to use l1 or linf distances for maximum tolerance between points.
+  - Currently 'equal_odds' is defined using l-infinity distance (max between
+  TPR and FPR distances);
+
+"""
+import logging
+from itertools import product
+from typing import Callable
+
+import numpy as np
+from sklearn.metrics import roc_curve
+from sklearn.base import BaseEstimator, MetaEstimatorMixin
+
+from fairlearn.utils._input_validation import _validate_and_reformat_input
+from fairlearn.reductions._moments.error_rate import _MESSAGE_BAD_COSTS
+
+from ._cvxpy_utils import compute_equal_odds_optimum
+from ._roc_utils import (
+    roc_convex_hull,
+    calc_cost_of_point,
+)
+from ._randomized_classifiers import (  # TODO: try to use fairlearn's InterpolatedThreshold instead of our classifier API
+    RandomizedClassifier,
+    EnsembleGroupwiseClassifiers,
+)
+
+
+class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
+    r"""Class to encapsulate all the logic needed to compute the optimal
+    postprocessing to fulfill fairness constraints with some optional
+    tolerance.
+
+    The method amounts to finding the set of (potentially randomized) 
+    group-specific decision thresholds that maximize some goal (e.g., accuracy),
+    given a maximum tolerance (or slack) on the fairness constraint fulfillment.
+    
+    This optimization problem amounts to a Linear Program (LP) as detailed in 
+    [1]_. Solving the LP requires installing `cvxpy`.
+
+    Read more in the :ref:`User Guide <postprocessing>`.
+
+    Parameters
+    ----------
+    estimator : object
+        A `scikit-learn compatible estimator <https://scikit-learn.org/stable/developers/develop.html#estimators>`_  # noqa
+        whose output will be postprocessed.
+        The estimator should output real-valued scores, as postprocessing 
+        results will be extremely poor when performed over binarized 
+        predictions.
+
+    tolerance : float
+        The absolute tolerance for the equal odds fairness constraint.
+        Will allow for `tolerance` difference between group-wise ROC points.
+
+    objective_costs : dict
+        A dictionary detailing the cost for false positives and false negatives,
+        of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`. Will use the 0-1
+        loss by default (maximum accuracy).
+
+    grid_size : int, optional
+        The maximum number of ticks (points) in each group's ROC curve, by
+        default 1000. This corresponds to the maximum number of different 
+        thresholds to use over a predictor.
+
+    seed : int
+        A random seed used for reproducibility when producing randomized
+        classifiers, by default None (default: non-reproducible behavior).
+
+    Notes
+    -----
+    The procedure for relaxed fairness constraint fulfillment is detailed in
+    `Cruz et al. (2023) <https://arxiv.org/abs/2306.07261>`_ [1]_.
+    The underlying threshold optimization algorithm is based on 
+    `Hardt et al. (2016) <https://arxiv.org/abs/1610.02413>`_ [2]_.
+
+    References
+    ----------
+    .. [1] A. Cruz, and M. Hardt, "Unprocessing Seven Years of 
+       Algorithmic Fairness," arXiv.org, 15-Jun-2023.
+       [Online]. Available: https://arxiv.org/abs/2306.07261.
+
+    .. [2] M. Hardt, E. Price, and N. Srebro, "Equality of Opportunity in
+       Supervised Learning," arXiv.org, 07-Oct-2016.
+       [Online]. Available: https://arxiv.org/abs/1610.02413.
+
+    """
+
+    def __init__(
+            self,
+            estimator: BaseEstimator,
+            tolerance: float,
+            objective_costs: dict = None,
+            grid_size: int = 1000,
+            seed: int = None,
+        ):
+
+        # Save arguments
+        self.estimator = estimator
+        self.tolerance = tolerance
+        self.max_grid_size = grid_size
+
+        # Unpack objective costs
+        if objective_costs is None:
+            self.false_pos_cost = 1.0
+            self.false_neg_cost = 1.0
+        else:
+            self.false_pos_cost, self.false_neg_cost = \
+                self.unpack_objective_costs(objective_costs)
+
+        # Randomly sample a seed if none was provided
+        self.seed = np.random.randint(2 ** 20)
+
+        # Initialize instance variables
+        self._all_roc_data: dict = None
+        self._all_roc_hulls: dict = None
+        self._groupwise_roc_points: np.ndarray = None
+        self._global_roc_point: np.ndarray = None
+        self._global_prevalence: float = None
+        self._realized_classifier: EnsembleGroupwiseClassifiers = None
+
+    @staticmethod
+    def unpack_objective_costs(objective_costs: dict) -> tuple[float, float]:
+        """Validates and unpacks the given `objective_costs`.
+
+        Parameters
+        ----------
+        objective_costs : dict
+            A dictionary detailing the cost for false positives and false negatives,
+            of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`. Will use the 0-1
+            loss by default (maximum accuracy).
+            
+        Returns
+        -------
+        tuple[float, float]
+            A tuple respectively composed of the cost of false positives and the
+            cost of false negatives, i.e., a tuple with 
+            :code:`(fp_cost, fn_cost)`.
+
+        Raises
+        ------
+        ValueError
+            Raised when the provided costs are invalid (e.g., missing keys
+            in the provided dict, or negative costs).
+        """
+        if (
+            type(objective_costs) is dict
+            and objective_costs.keys() == {"fp", "fn"}
+            and objective_costs["fp"] >= 0.0
+            and objective_costs["fn"] >= 0.0
+            and objective_costs["fp"] + objective_costs["fn"] > 0.0
+        ):
+            fp_cost = objective_costs["fp"]
+            fn_cost = objective_costs["fn"]
+        else:
+            raise ValueError(_MESSAGE_BAD_COSTS)
+        
+        return fp_cost, fn_cost
+
+    @property
+    def groupwise_roc_points(self) -> np.ndarray:
+        return self._groupwise_roc_points
+
+    @property
+    def global_roc_point(self) -> np.ndarray:
+        return self._global_roc_point
+
+    def cost(
+            self,
+            false_pos_cost: float = None,
+            false_neg_cost: float = None,
+        ) -> float:
+        """Computes the theoretical cost of the solution found.
+
+        Use false_pos_cost==false_neg_cost==1 for the 0-1 loss (the 
+        standard error rate), which amounts to maximizing accuracy.
+
+        Parameters
+        ----------
+        false_pos_cost : float, optional
+            The cost of a FALSE POSITIVE error, by default will take the value
+            given in the object's constructor.
+        false_neg_cost : float, optional
+            The cost of a FALSE NEGATIVE error, by default will take the value
+            given in the object's constructor.
+
+        Returns
+        -------
+        float
+            The cost of the solution found.
+        """
+        self._check_fit_status()
+        global_fpr, global_tpr = self.global_roc_point
+
+        return calc_cost_of_point(
+            fpr=global_fpr,
+            fnr=1 - global_tpr,
+            prevalence=self._global_prevalence,
+            false_pos_cost=false_pos_cost or self.false_pos_cost,
+            false_neg_cost=false_neg_cost or self.false_neg_cost,
+        )
+    
+    def constraint_violation(self) -> float:
+        """This method should be part of a common interface between different
+        relaxed-constraint classes.
+
+        Returns
+        -------
+        float
+            The fairness constraint violation.
+        """
+        return self.equal_odds_violation()
+
+    def equal_odds_violation(self) -> float:
+        """Computes the theoretical violation of the equal odds constraint 
+        (i.e., the maximum l-inf distance between the ROC point of any pair
+        of groups).
+
+        Returns
+        -------
+        float
+            The equal-odds constraint violation.
+        """
+        self._check_fit_status()
+
+        n_groups = len(self.groupwise_roc_points)
+
+        # Compute l-inf distance between each pair of groups
+        linf_constraint_violation = [
+            (np.linalg.norm(
+                self.groupwise_roc_points[i] - self.groupwise_roc_points[j],
+                ord=np.inf), (i, j))
+            for i, j in product(range(n_groups), range(n_groups))
+            if i < j
+        ]
+
+        # Return the maximum
+        max_violation, (groupA, groupB) = max(linf_constraint_violation)
+        logging.info(
+            f"Maximum fairness violation is between "
+            f"group={groupA} (p={self.groupwise_roc_points[groupA]}) and "
+            f"group={groupB} (p={self.groupwise_roc_points[groupB]});"
+        )
+
+        return max_violation
+
+
+    def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndarray = None):
+        """Fit this predictor to achieve the (possibly relaxed) equal odds 
+        constraint on the provided data.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            The input features.
+        y : np.ndarray
+            The input labels.
+        group : np.ndarray
+            The group membership of each sample.
+            Assumes groups are numbered [0, 1, ..., num_groups-1].
+        y_scores : np.ndarray, optional
+            The pre-computed model predictions on this data.
+
+        Returns
+        -------
+        callable
+            Returns self.
+        """
+
+        # Compute group stats
+        self._global_prevalence = np.sum(y) / len(y)
+
+        unique_groups = np.unique(group)
+        num_groups = len(unique_groups)
+        if np.max(unique_groups) > num_groups-1:
+            raise ValueError(
+                f"Groups should be numbered starting at 0, and up to "
+                f"num_groups-1. Got {num_groups} groups, but max value is "
+                f"{np.max(unique_groups)} != num_groups-1 == {num_groups-1}."
+            )
+
+        # Relative group sizes for LN and LP samples
+        group_sizes_label_neg = np.array([
+            np.sum(1 - y[group == g]) for g in unique_groups
+        ])
+        group_sizes_label_pos = np.array([
+            np.sum(y[group == g]) for g in unique_groups
+        ])
+
+        if np.sum(group_sizes_label_neg) + np.sum(group_sizes_label_pos) != len(y):
+            raise RuntimeError(f"Failed sanity check. Are you using non-binary labels?")
+
+        # Convert to relative sizes
+        group_sizes_label_neg = group_sizes_label_neg.astype(float) / np.sum(group_sizes_label_neg)
+        group_sizes_label_pos = group_sizes_label_pos.astype(float) / np.sum(group_sizes_label_pos)
+
+        # Compute group-wise ROC curves
+        if y_scores is None:
+            y_scores = self.estimator(X)
+
+        self._all_roc_data = dict()
+        for g in unique_groups:
+            group_filter = group == g
+
+            roc_curve_data = roc_curve(
+                y[group_filter],
+                y_scores[group_filter],
+            )
+
+            # Check if max_roc_ticks is exceeded
+            fpr, tpr, thrs = roc_curve_data
+            if self.max_grid_size is not None and len(fpr) > self.max_grid_size:
+                indices_to_keep = np.arange(0, len(fpr), len(fpr) / self.max_grid_size).astype(int)
+
+                # Bottom-left (0,0) and top-right (1,1) points must be kept
+                indices_to_keep[-1] = len(fpr) - 1
+                roc_curve_data = (fpr[indices_to_keep], tpr[indices_to_keep], thrs[indices_to_keep])
+
+            self._all_roc_data[g] = roc_curve_data
+
+        # Compute convex hull of each ROC curve
+        self._all_roc_hulls = dict()
+        for g in unique_groups:
+            group_fpr, group_tpr, _group_thresholds = self._all_roc_data[g]
+
+            curr_roc_points = np.stack((group_fpr, group_tpr), axis=1)
+            curr_roc_points = np.vstack((curr_roc_points, [1, 0]))  # Add point (1, 0) to ROC curve
+
+            self._all_roc_hulls[g] = roc_convex_hull(curr_roc_points)
+
+        # Find the group-wise optima that fulfill the fairness criteria
+        self._groupwise_roc_points, self._global_roc_point = compute_equal_odds_optimum(
+            groupwise_roc_hulls=self._all_roc_hulls,
+            fairness_tolerance=self.tolerance,
+            group_sizes_label_pos=group_sizes_label_pos,
+            group_sizes_label_neg=group_sizes_label_neg,
+            global_prevalence=self._global_prevalence,
+            false_positive_cost=self.false_pos_cost,
+            false_negative_cost=self.false_neg_cost,
+        )
+
+        # Construct each group-specific classifier
+        all_rand_clfs = {
+            g: RandomizedClassifier.construct_at_target_ROC(
+                predictor=self.estimator,
+                roc_curve_data=self._all_roc_data[g],
+                target_roc_point=self._groupwise_roc_points[g],
+                seed=self.seed,
+            )
+            for g in unique_groups
+        }
+
+        # Construct the global classifier (can be used for all groups)
+        self._realized_classifier = EnsembleGroupwiseClassifiers(group_to_clf=all_rand_clfs)
+        return self
+    
+    def _check_fit_status(self, raise_error: bool = True) -> bool:
+        """Checks whether this classifier has been fit on some data.
+        
+        Parameters
+        ----------
+        raise_error : bool, optional
+            Whether to raise an error if the classifier is uninitialized 
+            (otherwise will just return False), by default True.
+
+        Returns
+        -------
+        is_fit : bool
+            Whether the classifier was already fit on some data.
+
+        Raises
+        ------
+        RuntimeError
+            If `raise_error==True`, raises an error if the classifier is
+            uninitialized.
+        """
+        if self._realized_classifier is None:
+            if not raise_error:
+                return False
+
+            raise RuntimeError(
+                "This classifier has not yet been fitted to any data. "
+                "Call clf.fit(...) before this method.")
+
+        return True
+
+    def predict(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
+        return self._realized_classifier(X, group)
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
new file mode 100644
index 000000000..0ca50394d
--- /dev/null
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -0,0 +1,363 @@
+"""A set of helper functions for defining cvxpy LP objective and constraints.
+"""
+
+from __future__ import annotations
+import logging
+from itertools import product
+
+import numpy as np
+
+from ._constants import _CVXPY_IMPORT_ERROR_MESSAGE
+from ._roc_utils import calc_cost_of_point, compute_global_roc_from_groupwise
+
+
+# Maximum distance from solution to feasibility or optimality
+SOLUTION_TOLERANCE = 1e-9
+
+
+def _import_cvxpy_if_available():
+    """Will try to import `cvxpy` and raise an appropriate error if it's not
+    installed.
+    """
+    try:
+        import cvxpy as cp
+    except ImportError:
+        raise RuntimeError(_CVXPY_IMPORT_ERROR_MESSAGE)
+
+
+def compute_line(p1: np.ndarray, p2: np.ndarray) -> tuple[float, float]:
+    """Computes the slope and intercept of the line that passes
+    through the two given points.
+    
+    The intercept is the value at x=0!
+    (or NaN for vertical lines)
+    
+    For vertical lines just use the x-value of one of the points
+    to find the intercept at y=0.
+
+    Parameters
+    ----------
+    p1 : np.ndarray
+        A 2-D point.
+    p2 : np.ndarray
+        A 2-D point.
+
+    Returns
+    -------
+    tuple[float, float]
+        A tuple pair with (slope, intercept) of the line that goes from p1 to p2.
+
+    Raises
+    ------
+    ValueError
+        Raised when input is invalid, e.g., when p1 == p2.
+    """
+
+    p1x, p1y = p1
+    p2x, p2y = p2
+    if all(p1 == p2):
+        raise ValueError("Invalid points: p1==p2;")
+
+    # Vertical line
+    if np.isclose(p2x, p1x):
+        slope = np.inf
+        intercept = np.nan
+
+    # Diagonal or horizontal line
+    else:
+        slope = (p2y - p1y) / (p2x - p1x)
+        intercept = p1y - slope * p1x
+
+    return slope, intercept
+
+
+def compute_halfspace_inequality(
+        p1: np.ndarray,
+        p2: np.ndarray,
+    ) -> tuple[float, float, float]:
+    """Computes the halfspace inequality defined by the vector p1->p2, such that
+        Ax + b <= 0,
+        where A and b are extracted from the line that goes through p1->p2.
+
+    As such, the inequality enforces that points must lie on the LEFT of the 
+    line defined by the p1->p2 vector.
+
+    In other words, input points are assumed to be in COUNTER CLOCK-WISE order 
+    (right-hand rule).
+
+    Parameters
+    ----------
+    p1 : np.ndarray
+        A point in the halfspace.
+    p2 : np.ndarray
+        Another point in the halfspace.
+
+    Returns
+    -------
+    tuple[float, float, float]
+        Returns an array of size=(n_dims + 1), with format [A; b],
+        representing the inequality Ax + b <= 0.
+
+    Raises
+    ------
+    RuntimeError
+        Thrown in case if inconsistent internal state variables.
+    """
+    slope, intercept = compute_line(p1, p2)
+
+    # Unpack the points for ease of use
+    p1x, p1y = p1
+    p2x, p2y = p2
+
+    # if slope is infinity, the constraint only applies to the values of x;
+    # > the halfspace's b intercept value will correspond to this value of x;
+    if np.isinf(slope):
+
+        # Sanity check for vertical line
+        if not np.isclose(p1x, p2x):
+            raise RuntimeError(
+                "Got infinite slope for line containing two points with "
+                "different x-axis coordinates.")
+        
+        # Vector pointing downwards? then, x >= b
+        if p2y < p1y:
+            return [-1, 0, p1x]
+        
+        # Vector pointing upwards? then, x <= b
+        elif p2y > p1y:
+            return [1, 0, -p1x]
+        
+    # elif slope is zero, the constraint only applies to the values of y;
+    # > the halfspace's b intercept value will correspond to this value of y;
+    elif np.isclose(slope, 0.0):
+
+        # Sanity checks for horizontal line
+        if not np.isclose(p1y, p2y) or not np.isclose(p1y, intercept):
+            raise RuntimeError(
+                f"Invalid horizontal line; points p1 and p2 should have same "
+                f"y-axis value as intercept ({p1y}, {p2y}, {intercept}).")
+
+        # Vector pointing leftwards? then, y <= b
+        if p2x < p1x:
+            return [0, 1, -p1y]
+        
+        # Vector pointing rightwards? then, y >= b
+        elif p2x > p1x:
+            return [0, -1, p1y]
+
+    # else, we have a standard diagonal line
+    else:
+        
+        # Vector points left?
+        # then, y <= mx + b <=> -mx + y - b <= 0
+        if p2x < p1x:
+            return [-slope, 1, -intercept]
+        
+        # Vector points right?
+        # then, y >= mx + b <=> mx - y + b <= 0
+        elif p2x > p1x:
+            return [slope, -1, intercept]
+        
+    logging.error(f"No constraint can be concluded from points p1={p1} and p2={p2};")
+    return [0, 0, 0]
+
+
+def make_cvxpy_halfspace_inequality(
+        p1: np.ndarray,
+        p2: np.ndarray,
+        cvxpy_point: "cvxpy.Variable",
+    ) -> "cvxpy.Expression":
+    """Creates a single cvxpy inequality constraint that enforces the given 
+    point, `cvxpy_point`, to lie on the left of the vector p1->p2.
+
+    Points must be sorted in counter clock-wise order!
+
+    Parameters
+    ----------
+    p1 : np.ndarray
+        A point p1.
+    p2 : np.ndarray
+        Another point p2.
+    cvxpy_point : cvxpy.Variable
+        The cvxpy variable over which the constraint will be applied.
+
+    Returns
+    -------
+    cvxpy.Expression
+        A linear inequality constraint of type Ax + b <= 0.
+    """
+    x_coeff, y_coeff, b = compute_halfspace_inequality(p1, p2)
+    return np.array([x_coeff, y_coeff]) @ cvxpy_point + b <= 0
+
+
+def make_cvxpy_point_in_polygon_constraints(
+        polygon_vertices: np.ndarray,
+        cvxpy_point: "cvxpy.Variable",
+    ) -> list["cvxpy.Expression"]:
+    """Creates the set of cvxpy constraints that force the given cvxpy variable
+    point to lie within the polygon defined by the given vertices.
+
+    Parameters
+    ----------
+    polygon_vertices : np.ndarray
+        A sequence of points that make up a polygon.
+        Points must be sorted in COUNTER CLOCK-WISE order! (right-hand rule)
+    cvxpy_point : cvxpy.Variable
+        A cvxpy variable representing a point, over which the constraints will
+        be applied.
+
+    Returns
+    -------
+    list[cvxpy.Expression]
+        A list of cvxpy constraints.
+    """
+    return [
+        make_cvxpy_halfspace_inequality(
+            polygon_vertices[i], polygon_vertices[(i+1) % len(polygon_vertices)],
+            cvxpy_point,
+        )
+        for i in range(len(polygon_vertices))
+    ]
+
+
+def compute_equal_odds_optimum(
+        groupwise_roc_hulls: dict[int, np.ndarray],
+        fairness_tolerance: float,
+        group_sizes_label_pos: np.ndarray,
+        group_sizes_label_neg: np.ndarray,
+        global_prevalence: float,
+        false_positive_cost: float = 1.,
+        false_negative_cost: float = 1.,
+    ) -> tuple[np.ndarray, np.ndarray]:
+    """Computes the solution to finding the optimal fair (equal odds) classifier.
+
+    Can relax the equal odds constraint by some given tolerance.
+
+    Parameters
+    ----------
+    groupwise_roc_hulls : dict[int, np.ndarray]
+        A dict mapping each group to the convex hull of the group's ROC curve.
+        The convex hull is an np.array of shape (n_points, 2), containing the 
+        points that form the convex hull of the ROC curve, sorted in COUNTER
+        CLOCK-WISE order.
+    fairness_tolerance : float
+        A value for the tolerance when enforcing the equal odds fairness 
+        constraint, i.e., equality of TPR and FPR among groups.
+    group_sizes_label_pos : np.ndarray
+        The relative or absolute number of positive samples in each group.
+    group_sizes_label_neg : np.ndarray
+        The relative or absolute number of negative samples in each group.
+    global_prevalence : float
+        The global prevalence of positive samples.
+    false_positive_cost : float, optional
+        The cost of a FALSE POSITIVE error, by default 1.
+    false_negative_cost : float, optional
+        The cost of a FALSE NEGATIVE error, by default 1.
+
+    Returns
+    -------
+    (groupwise_roc_points, global_roc_point) : tuple[np.ndarray, np.ndarray]
+        A tuple pair, (<1>, <2>), containing:
+        1: an array with the group-wise ROC points for the solution.
+        2: an array with the single global ROC point for the solution.
+    """
+    _import_cvxpy_if_available()
+
+    n_groups = len(groupwise_roc_hulls)
+    if n_groups != len(group_sizes_label_neg) or n_groups != len(group_sizes_label_pos):
+        raise ValueError(
+            f"Invalid arguments; all of the following should have the same "
+            f"length: groupwise_roc_hulls, group_sizes_label_neg, group_sizes_label_pos;")
+
+    # Group-wise ROC points
+    groupwise_roc_points_vars = [
+        cp.Variable(shape=2, name=f"ROC point for group {i}", nonneg=True)
+        for i in range(n_groups)
+    ]
+
+    # Define global ROC point as a linear combination of the group-wise ROC points
+    global_roc_point_var = cp.Variable(shape=2, name="Global ROC point", nonneg=True)
+    constraints = [
+        # Global FPR is the average of group FPRs weighted by LNs in each group
+        global_roc_point_var[0] == group_sizes_label_neg @ np.array([p[0] for p in groupwise_roc_points_vars]),
+
+        # Global TPR is the average of group TPRs weighted by LPs in each group
+        global_roc_point_var[1] == group_sizes_label_pos @ np.array([p[1] for p in groupwise_roc_points_vars]),
+    ]
+
+    # Relaxed equal odds constraints
+    # 1st option - CONSTRAINT FOR: l-inf distance between any two group's ROCs being less than epsilon
+    constraints += [
+        cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) <= fairness_tolerance
+        for i, j in product(range(n_groups), range(n_groups))
+        if i < j
+        # if i != j
+    ]
+
+    # Constraints for points in respective group-wise ROC curves
+    for idx in range(n_groups):
+        constraints += make_cvxpy_point_in_polygon_constraints(
+            polygon_vertices=groupwise_roc_hulls[idx],
+            cvxpy_point=groupwise_roc_points_vars[idx])
+
+    # Define cost function
+    obj = cp.Minimize(calc_cost_of_point(
+        fpr=global_roc_point_var[0],
+        fnr=1 - global_roc_point_var[1],
+        prevalence=global_prevalence,
+        false_pos_cost=false_positive_cost,
+        false_neg_cost=false_negative_cost,
+    ))
+
+    # Define cvxpy problem
+    prob = cp.Problem(obj, constraints)
+
+    # Run solver
+    prob.solve(solver=cp.ECOS, abstol=SOLUTION_TOLERANCE, feastol=SOLUTION_TOLERANCE)
+    # NOTE: these tolerances are supposed to be smaller than the default np.isclose tolerances
+    # (useful when comparing if two points are the same, within the cvxpy accuracy tolerance)
+
+    # Log solution
+    logging.info(f"cvxpy solver took {prob.solver_stats.solve_time}s; status is {prob.status}.")
+
+    if prob.status not in ["infeasible", "unbounded"]:
+        # Otherwise, problem.value is inf or -inf, respectively.
+        logging.info(f"Optimal solution value: {prob.value}")
+        for variable in prob.variables():
+            logging.info(f"Variable {variable.name()}: value {variable.value}")
+    else:
+        # This line should never be reached (there are always trivial fair
+        # solutions in the ROC diagonal)
+        raise ValueError(f"cvxpy problem has no solution; status={prob.status}")
+
+    groupwise_roc_points = np.vstack([p.value for p in groupwise_roc_points_vars])
+    global_roc_point = global_roc_point_var.value
+
+    # Sanity check solution cost
+    solution_cost = calc_cost_of_point(
+        fpr=global_roc_point[0],
+        fnr=1-global_roc_point[1],
+        prevalence=global_prevalence,
+        false_pos_cost=false_positive_cost,
+        false_neg_cost=false_negative_cost,
+    )
+
+    if not np.isclose(solution_cost, prob.value):
+        logging.error(
+            f"Solution was found but cost did not pass sanity check! "
+            f"Found solution ROC point {global_roc_point} with theoretical cost "
+            f"{prob.value}, but actual cost is {solution_cost};")
+
+    # Sanity check congruency between group-wise ROC points and global ROC point
+    global_roc_from_groupwise = compute_global_roc_from_groupwise(
+        groupwise_roc_points=groupwise_roc_points,
+        groupwise_label_pos_weight=group_sizes_label_pos,
+        groupwise_label_neg_weight=group_sizes_label_neg,
+    )
+    if not all(np.isclose(global_roc_from_groupwise, global_roc_point)):
+        logging.error(
+            f"Solution: global ROC point ({global_roc_point}) does not seem to "
+            f"match group-wise ROC points; global should be "
+            f"({global_roc_from_groupwise}) to be consistent with group-wise;")
+
+    return groupwise_roc_points, global_roc_point
diff --git a/fairlearn/postprocessing/_randomized_classifiers.py b/fairlearn/postprocessing/_randomized_classifiers.py
new file mode 100644
index 000000000..9926bde93
--- /dev/null
+++ b/fairlearn/postprocessing/_randomized_classifiers.py
@@ -0,0 +1,461 @@
+"""Helper functions to construct and use randomized classifiers.
+
+TODO: this module will probably be substituted by the InterpolatedThresholder
+already implemented in fairlearn.
+
+"""
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Callable
+
+import numpy as np
+from scipy.spatial import ConvexHull
+
+
+class Classifier(ABC):
+    @abstractmethod
+    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+        """Return predicted class, Y, for the given input features, X.
+        """
+        raise NotImplementedError
+
+
+class BinaryClassifier(Classifier):
+    """Constructs a deterministic binary classifier, by thresholding a
+    real-valued score predictor.
+    """
+
+    def __init__(
+            self,
+            score_predictor: callable,
+            threshold: float,
+        ):
+        """Constructs a deterministic binary classifier from the given
+        real-valued score predictor and a threshold in {0, 1}.
+        """
+        self.score_predictor = score_predictor
+        self.threshold = threshold
+
+    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+        """Computes predictions for the given samples, X.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            The input samples, in shape (num_samples, num_features).
+        group : None, optional
+            None. This argument will be ignored by this classifier as it does 
+            not consider sensitive attributes.
+
+        Returns
+        -------
+        y_pred_binary : np.ndarray[int]
+            The predicted class for each input sample.
+        """
+        return (self.score_predictor(X) >= self.threshold).astype(int)
+
+
+class BinaryClassifierAtROCDiagonal(Classifier):
+    """A dummy classifier whose predictions have no correlation with the input
+    features, but achieves whichever target FPR or TPR you want (on ROC diag.)
+    """
+
+    def __init__(
+            self,
+            target_fpr: float = None,
+            target_tpr: float = None,
+            seed: int = 42,
+        ):
+        err_msg = (
+            f"Must provide exactly one of 'target_fpr' or 'target_tpr', "
+            f"got target_fpr={target_fpr}, target_tpr={target_tpr}."
+        )
+        if target_fpr is not None and target_tpr is not None:
+            raise ValueError(err_msg)
+
+        # Provided FPR
+        if target_fpr is not None:
+            self.target_fpr = target_fpr
+            self.target_tpr = target_fpr
+
+        # Provided TPR
+        elif target_tpr is not None:
+            self.target_tpr = target_tpr
+            self.target_fpr = target_tpr
+        
+        # Provided neither!
+        else:
+            raise ValueError(err_msg)
+        
+        # Initiate random number generator
+        self.rng = np.random.default_rng(seed)
+
+    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+        return (self.rng.random(size=len(X)) >= (1 - self.target_fpr)).astype(int)
+
+
+class EnsembleGroupwiseClassifiers(Classifier):
+    """Constructs a classifier from a set of group-specific classifiers.
+    """
+
+    def __init__(self, group_to_clf: dict[int | str, Callable]):
+        """Constructs a classifier from a set of group-specific classifiers.
+
+        Must be provided exactly one classifier per unique group value.
+
+        Parameters
+        ----------
+        group_to_clf : dict[int | str, callable]
+            A mapping of group value to the classifier that should handle 
+            predictions for that specific group.
+        """
+        self.group_to_clf = group_to_clf
+
+    def __call__(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
+        """Compute predictions for the given input samples X, given their
+        sensitive attributes, group.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            Input samples, with shape (num_samples, num_features).
+        group : np.ndarray, optional
+            The sensitive attribute value for each input sample.
+
+        Returns
+        -------
+        y_pred : np.ndarray
+            The predictions, where the prediction for each sample is handed off
+            to a group-specific classifier for that sample.
+        """
+        if len(X) != len(group):
+            raise ValueError(f"Invalid input sizes len(X) != len(group)")
+
+        # Array to store predictions
+        num_samples = len(X)
+        y_pred = np.zeros(num_samples)
+
+        # Filter to keep track of all samples that received a prediction
+        cumulative_filter = np.zeros(num_samples).astype(bool)
+
+        for group_value, group_clf in self.group_to_clf.items():
+            group_filter = (group == group_value)
+            y_pred[group_filter] = group_clf(X[group_filter])
+            cumulative_filter |= group_filter
+
+        if np.sum(cumulative_filter) != num_samples:
+            raise RuntimeError(
+                f"Computed group-wise predictions for {np.sum(cumulative_filter)} "
+                f"samples, but got {num_samples} input samples.")
+
+        return y_pred
+
+
+class RandomizedClassifier(Classifier):
+    """Constructs a randomized classifier from the given  classifiers and 
+    their probabilities.
+    """
+
+    def __init__(
+            self,
+            classifiers: list[Classifier],
+            probabilities: list[float],
+            seed: int = 42,
+        ):
+        """Constructs a randomized classifier from the given  classifiers and 
+        their probabilities.
+        
+        This classifier will compute predictions for the whole input dataset at 
+        once, which will in general be faster for larger inputs (when compared 
+        to predicting each sample separately).
+
+        Parameters
+        ----------
+        classifiers : list[callable]
+            A list of classifiers
+        probabilities : list[float]
+            A list of probabilities for each given classifier, where 
+            probabilities[idx] is the probability of using the prediction from 
+            classifiers[idx].
+        seed : int, optional
+            A random seed, by default 42.
+
+        Returns
+        -------
+        callable
+            The corresponding randomized classifier.
+        """
+        if len(classifiers) != len(probabilities):
+            raise ValueError(
+                f"Invalid arguments: len(classifiers) != len(probabilities); "
+                f"({len(classifiers)} != {len(probabilities)});")
+
+        self.classifiers = classifiers
+        self.probabilities = probabilities
+        self.rng = np.random.default_rng(seed)
+    
+    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> int:
+        # Assign each sample to a classifier
+        clf_idx = self.rng.choice(
+            np.arange(len(self.classifiers)),       # possible choices
+            size=len(X),                            # size of output array
+            p=self.probabilities,                   # prob. of each choice
+        )
+        
+        # Run predictions for all classifiers on all samples
+        y_pred_choices = [clf(X) for clf in self.classifiers]
+        # TODO:
+        # we could actually just run the classifier for the samples that get
+        # matched with it... similar to the EnsembleGroupwiseClassifiers call
+        # method.
+        
+        return np.choose(clf_idx, y_pred_choices)
+
+
+    @staticmethod
+    def find_weights_given_two_points(
+            point_A: np.ndarray,
+            point_B: np.ndarray,
+            target_point: np.ndarray,
+        ):
+        """Given two ROC points corresponding to existing binary classifiers,
+        find the weights that result in a classifier whose ROC point is target_point.
+        
+        May need to interpolate the two given points with a third point corresponding
+        to a random classifier (random uniform distribution with different thresholds).
+        
+        Returns
+        -------
+        tuple[np.ndarray, np.ndarray]
+            Returns a tuple of numpy arrays (Ws, Ps), such that Ws @ Ps == target_point.
+            The 1st array, Ws, corresponds to the weights of each point in the 2nd array, Ps.
+        """
+        # Check if the target point is actually point A or B
+        if all(np.isclose(point_A, target_point)):
+            return np.array([1]), np.expand_dims(point_A, axis=0)
+
+        if all(np.isclose(point_B, target_point)):
+            return np.array([1]), np.expand_dims(point_B, axis=0)
+        
+        # If not, we'll have to triangulate the target using A and B
+        point_A_fpr, point_A_tpr = point_A
+        point_B_fpr, point_B_tpr = point_B
+        target_fpr, target_tpr = target_point
+        if not (point_A_fpr <= target_fpr <= point_B_fpr):
+            raise ValueError(
+                f"Invalid input. FPR should fulfill: "
+                f"({point_A_fpr} point_A_FPR) <= ({target_fpr} target_fpr) <= "
+                f"({point_B_fpr} point_B_fpr)")
+
+        # Calculate weights for points A and B
+        weight_A = (target_fpr - point_B_fpr) / (point_A_fpr - point_B_fpr)
+
+        # Result of projecting target point P directly UPWARDS towards the AB line
+        weights_AB = np.array([weight_A, 1 - weight_A])
+        point_P_upwards = weights_AB @ np.vstack((point_A, point_B))
+        if not np.isclose(point_P_upwards[0], target_fpr):
+            raise RuntimeError(
+                "Failed projecting target_fpr to ROC hull frontier. "
+                f"Got proj. FPR={point_P_upwards[0]}; target FPR={target_fpr};")
+        
+        # Check if the target point lies in the AB line (and return if so)
+        if all(np.isclose(point_P_upwards, target_point)):
+            return weights_AB, np.vstack((point_A, point_B))
+
+        # Result of projecting target point P directly DOWNWARDS towards the diagonal tpr==fpr
+        point_P_downwards = np.array([target_fpr, target_fpr])
+
+        # Calculate weights for P upwards and P downwards
+        weight_P_upwards = (target_tpr - point_P_downwards[1]) / (point_P_upwards[1] - point_P_downwards[1])
+
+        # Sanity checks...
+        all_points = np.vstack((point_A, point_B, point_P_downwards))
+        all_weights = np.hstack((weight_P_upwards * weights_AB, 1 - weight_P_upwards))
+
+        if not np.isclose(all_weights.sum(), 1):
+            raise RuntimeError(
+                f"Sum of linear interpolation weights was {all_weights.sum()}, "
+                f"should be 1!")
+
+        if not all(np.isclose(target_point, all_weights @ all_points)):
+            raise RuntimeError(
+                f"Triangulation of target point failed. "
+                f"Target was {target_point}; got {all_weights @ all_points}.")
+
+        return all_weights, all_points
+
+    @staticmethod
+    def construct_at_target_ROC(
+            predictor: callable,
+            roc_curve_data: tuple,
+            target_roc_point: np.ndarray,
+            seed: int = 42,
+        ) -> "RandomizedClassifier":
+        """Constructs a randomized classifier in the interior of the
+        convex hull of the classifier's ROC curve, at a given target
+        ROC point.
+        
+        Parameters
+        ----------
+        predictor : callable
+            A predictor that outputs real-valued scores in range [0; 1].
+        roc_curve_data : tuple[np.array...]
+            The ROC curve of the given classifier, as a tuple of
+            (FPR values; TPR values; threshold values).
+        target_roc_point : np.ndarray
+            The target ROC point in (FPR, TPR).
+        
+        Returns
+        -------
+        rand_clf : callable
+            A (randomized) binary classifier whose expected FPR and TPR
+            corresponds to the given target ROC point.
+        """
+        # Unpack useful constants
+        target_fpr, target_tpr = target_roc_point
+        fpr, tpr, thrs = roc_curve_data
+
+        # Check if we have more than two ROC points
+        # (3 minimum to compute convex hull)
+        if len(fpr) <= 1:
+            raise ValueError(
+                f"Invalid ROC curve data (only has one point): "
+                f"fpr:{fpr}; tpr:{tpr}.")
+
+        if len(fpr) == 2:
+            logging.warning(f"Got ROC data with only 2 points: producing a random classifier...")
+            if not np.isclose(target_roc_point[0], target_roc_point[1]):
+                logging.error(
+                    f"Invalid target ROC point ({target_roc_point}) is not in "
+                    "diagonal ROC line, but a random-classifier ROC was provided.")
+
+            return BinaryClassifierAtROCDiagonal(target_fpr=target_roc_point[0])
+
+        # Compute hull of ROC curve
+        roc_curve_points = np.stack((fpr, tpr), axis=1)
+        hull = ConvexHull(roc_curve_points)
+
+        # Filter out ROC points in the interior of the convex hull and other suboptimal points
+        points_above_diagonal = np.argwhere(tpr >= fpr).ravel()
+        useful_points_idx = np.array(sorted(set(hull.vertices) & set(points_above_diagonal)))
+
+        fpr = fpr[useful_points_idx]
+        tpr = tpr[useful_points_idx]
+        thrs = thrs[useful_points_idx]
+
+        # Find points A and B to construct the randomized classifier from
+        # > point A is the last point with FPR smaller or equal to the target
+        point_A_idx = 0
+        if target_fpr > 0:
+            point_A_idx = max(np.argwhere(fpr <= target_fpr).ravel())
+        point_A_roc = roc_curve_points[useful_points_idx][point_A_idx]
+
+        # > point B is the first point with FPR larger than the target
+        point_B_idx = min(point_A_idx + 1, len(thrs) - 1)
+        point_B_roc = roc_curve_points[useful_points_idx][point_B_idx]
+
+        weights, points = RandomizedClassifier.find_weights_given_two_points(
+            point_A=point_A_roc,
+            point_B=point_B_roc,
+            target_point=target_roc_point,
+        )
+
+        if max(weights) > 1:
+            logging.error(f"Got triangulation weights over 100%: w={weights};")
+
+        # Instantiate classifiers for points A and B
+        clf_a = BinaryClassifier(predictor, threshold=thrs[point_A_idx])
+        clf_b = BinaryClassifier(predictor, threshold=thrs[point_B_idx])
+
+        # Check if most of the probability mass is on a single classifier
+        if np.isclose(max(weights), 1.0):
+            if all(np.isclose(target_roc_point, point_A_roc)):
+                return clf_a
+
+            elif all(np.isclose(target_roc_point, point_B_roc)):
+                return clf_b
+            
+            else:
+                # differences from target point to A or B are significant enough
+                # to warrant triangulating between multiple points
+                pass
+
+        # If only one point returned, then that point should have weight==1.0
+        # (hence, should've been caught by the previous if statement)
+        if len(weights) == 1:
+            raise RuntimeError("Invalid triangulation.")
+        
+        # If there are two points, return a randomized classifier between the two
+        elif len(weights) == 2:
+            return RandomizedClassifier(
+                classifiers=[clf_a, clf_b],
+                probabilities=weights,
+                seed=seed,
+            )
+
+        # If it's in the interior of the ROC curve, requires instantiating a randomized classifier at the diagonal
+        elif len(weights) == 3:
+            fpr_rand, tpr_rand = points[2]
+            if not np.isclose(fpr_rand, tpr_rand):
+                raise RuntimeError(
+                    f"Triangulation point at ROC diagonal has FPR != TPR "
+                    f"({fpr_rand} != {tpr_rand}); ")
+
+            # >>> BUG this would be better but for some reason it doesn't work!
+            # rng = np.random.default_rng(42)
+            # clf_rand = lambda X: (rng.random(size=len(X)) >= (1 - fpr_rand)).astype(int)
+            # # or...
+            # clf_rand = BinaryClassifierAtROCDiagonal(target_fpr=fpr_rand)
+            # <<<
+            clf_rand = lambda X: (np.random.random(size=len(X)) >= (1 - fpr_rand)).astype(int)
+
+            return RandomizedClassifier(
+                classifiers=[clf_a, clf_b, clf_rand],
+                probabilities=weights,
+                seed=seed)
+        
+        else:
+            raise RuntimeError(
+                f"Invalid triangulation of classifiers; "
+                f"weights: {weights}; points: {points};")
+
+    @staticmethod
+    def find_points_for_target_ROC(roc_curve_data, target_roc_point):
+        """Retrieves a set of realizable points (and respective weights) in the
+        provided ROC curve that can be used to realize any target ROC in the
+        interior of the ROC curve.
+
+        NOTE: this method is a bit redundant -- has functionality in common with
+        RandomizedClassifier.construct_at_target_ROC()
+        """
+        # Unpack useful constants
+        target_fpr, target_tpr = target_roc_point
+        fpr, tpr, thrs = roc_curve_data
+
+        # Compute hull of ROC curve
+        roc_curve_points = np.stack((fpr, tpr), axis=1)
+        hull = ConvexHull(roc_curve_points)
+
+        # Filter out ROC points in the interior of the convex hull and other suboptimal points
+        points_above_diagonal = np.argwhere(tpr >= fpr).ravel()
+        useful_points_idx = np.array(sorted(set(hull.vertices) & set(points_above_diagonal)))
+
+        fpr = fpr[useful_points_idx]
+        tpr = tpr[useful_points_idx]
+        thrs = thrs[useful_points_idx]
+
+        # Find points A and B to construct the randomized classifier from
+        # > point A is the last point with FPR smaller or equal to the target
+        point_A_idx = max(np.argwhere(fpr <= target_fpr).ravel())
+        # > point B is the first point with FPR larger than the target
+        point_B_idx = point_A_idx + 1
+
+        weights, points = RandomizedClassifier.find_weights_given_two_points(
+            point_A=roc_curve_points[useful_points_idx][point_A_idx],
+            point_B=roc_curve_points[useful_points_idx][point_B_idx],
+            target_point=target_roc_point,
+        )
+
+        return weights, points
diff --git a/fairlearn/postprocessing/_roc_utils.py b/fairlearn/postprocessing/_roc_utils.py
new file mode 100644
index 000000000..244632155
--- /dev/null
+++ b/fairlearn/postprocessing/_roc_utils.py
@@ -0,0 +1,164 @@
+"""Helper functions for threshold optimization methods.
+
+NOTE
+----
+- Most utils defined here likely have a similar counter-part already implemented
+somewhere in the fairlearn code-base.
+- With time they will probably be substituted by that counter-part, and these
+implementations removed.
+"""
+import logging
+import numpy as np
+from scipy.spatial import ConvexHull
+from sklearn.metrics import confusion_matrix
+
+
+def calc_cost_of_point(
+        fpr: float,
+        fnr: float,
+        prevalence: float,
+        false_pos_cost: float = 1.,
+        false_neg_cost: float = 1.,
+    ) -> float:
+    """Calculates the cost of the given ROC point.
+
+    Parameters
+    ----------
+    fpr : float
+        The false positive rate (FPR).
+    fnr : float
+        The false negative rate (FNR).
+    prevalence : float
+        The prevalence of positive samples in the dataset,
+        i.e., np.sum(y_true) / len(y_true)
+    false_pos_cost : float, optional
+        The cost of a false positive error, by default 1.
+    false_neg_cost : float, optional
+        The cost of a false negative error, by default 1.
+
+    Returns
+    -------
+    cost : float
+        The cost of the given ROC point (divided by the size of the dataset).
+    """
+    cost_vector = np.array([false_pos_cost, false_neg_cost])
+    weight_vector = np.array([1 - prevalence, prevalence])
+    return cost_vector * weight_vector @ np.array([fpr, fnr])
+
+
+def compute_roc_point_from_predictions(y_true, y_pred_binary):
+    """Computes the ROC point associated with the provided binary predictions.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        The true labels.
+    y_pred_binary : np.ndarray
+        The binary predictions.
+
+    Returns
+    -------
+    tuple[float, float]
+        The resulting ROC point, i.e., a tuple (FPR, TPR).
+    """
+    tn, fp, fn, tp = confusion_matrix(y_true, y_pred_binary).ravel()
+
+    # FPR = FP / LN
+    fpr = fp / (fp + tn)
+    
+    # TPR = TP / LP
+    tpr = tp / (tp + fn)
+    
+    return (fpr, tpr)
+
+
+def compute_global_roc_from_groupwise(
+        groupwise_roc_points: np.ndarray,
+        groupwise_label_pos_weight: np.ndarray,
+        groupwise_label_neg_weight: np.ndarray,
+    ) -> np.ndarray:
+    """Computes the global ROC point that corresponds to the provided group-wise
+    ROC points.
+    
+    The global ROC is a linear combination of the group-wise points, with
+    different weights for computing FPR and TPR -- the first related to LNs, and
+    the second to LPs.
+
+    Parameters
+    ----------
+    groupwise_roc_points : np.ndarray
+        An array of shape (n_groups, n_roc_dims) containing one ROC point per
+        group.
+    groupwise_label_pos_weight : np.ndarray
+        The relative size of each group in terms of its label POSITIVE samples
+        (out of all POSITIVE samples, how many are in each group).
+    groupwise_label_neg_weight : np.ndarray
+        The relative size of each group in terms of its label NEGATIVE samples
+        (out of all NEGATIVE samples, how many are in each group).
+
+    Returns
+    -------
+    global_roc_point : np.ndarray
+        A single point that corresponds to the global outcome of the given
+        group-wise ROC points.
+    """
+    n_groups, _ = groupwise_roc_points.shape
+
+    # Some initial sanity checks
+    if (len(groupwise_label_pos_weight) != len(groupwise_label_neg_weight) or
+        len(groupwise_label_pos_weight) != n_groups):
+       raise ValueError(
+           "Invalid input shapes: length of all arguments must be equal (the "
+           "number of different sensitive groups).")
+
+    # Normalize group LP (/LN) weights by their size
+    if not np.isclose(groupwise_label_pos_weight.sum(), 1.0):
+        groupwise_label_pos_weight /= groupwise_label_pos_weight.sum()
+    if not np.isclose(groupwise_label_neg_weight.sum(), 1.0):
+        groupwise_label_neg_weight /= groupwise_label_neg_weight.sum()
+
+    # Compute global FPR (weighted by relative number of LNs in each group)
+    global_fpr = groupwise_label_neg_weight @ groupwise_roc_points[:, 0]
+
+    # Compute global TPR (weighted by relative number of LPs in each group)
+    global_tpr = groupwise_label_pos_weight @ groupwise_roc_points[:, 1]
+
+    global_roc_point = np.array([global_fpr, global_tpr])
+    return global_roc_point
+
+
+def roc_convex_hull(roc_points: np.ndarray) -> np.ndarray:
+    """Computes the convex hull of the provided ROC points.
+    
+    Parameters
+    ----------
+    roc_points : np.ndarray
+        An array of shape (n_points, n_dims) containing all points
+        of a provided ROC curve.
+    
+    Returns
+    -------
+    hull_points : np.ndarray
+        An array of shape (n_hull_points, n_dim) containing all
+        points in the convex hull of the ROC curve.
+    """
+
+    # Save init data just for logging
+    init_num_points, _dims = roc_points.shape
+
+    # Compute convex hull
+    hull = ConvexHull(roc_points)
+
+    # NOTE: discarding points below the diagonal seems to lead to bugs later on, idk why...
+    # Discard points in the interior of the convex hull,
+    # and other useless points (below main diagonal)
+    # points_above_diagonal = np.argwhere(roc_points[:, 1] >= roc_points[:, 0]).ravel()
+    # hull_indices = sorted(set(hull.vertices) & set(points_above_diagonal))
+
+    hull_indices = hull.vertices
+
+    logging.info(
+        f"ROC convex hull contains {len(hull_indices) / init_num_points:.1%} "
+        f"of the original points.")
+
+    return roc_points[hull_indices]
diff --git a/fairlearn/postprocessing/_threshold_operation.py b/fairlearn/postprocessing/_threshold_operation.py
index 4db5ee93f..caeca2659 100644
--- a/fairlearn/postprocessing/_threshold_operation.py
+++ b/fairlearn/postprocessing/_threshold_operation.py
@@ -18,7 +18,7 @@ class ThresholdOperation:
     """
 
     def __init__(self, operator, threshold):
-        if operator not in [">", "<"]:
+        if operator not in [">", "<"]:          # NOTE for PR: sklearn uses >= for ROC threshold; see: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html
             raise ValueError("Unrecognized operator: " + operator)
         self._operator = operator
         self._threshold = threshold

From 67adac7b64b3390e04d7149b74fc712733a732be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Cruz?= <AndreFCruz@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:41:49 +0100
Subject: [PATCH 02/13] Update
 fairlearn/postprocessing/_cvxpy_threshold_optimizer.py

Co-authored-by: Roman Lutz <romanlutz13@gmail.com>
---
 fairlearn/postprocessing/_cvxpy_threshold_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 2e3c48f21..84cdfd131 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -39,7 +39,7 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     tolerance.
 
     The method amounts to finding the set of (potentially randomized) 
-    group-specific decision thresholds that maximize some goal (e.g., accuracy),
+    group-specific decision thresholds that maximize some objective (e.g., accuracy),
     given a maximum tolerance (or slack) on the fairness constraint fulfillment.
     
     This optimization problem amounts to a Linear Program (LP) as detailed in 

From 7155e4499d4b428eb6d376dd557169c96a503dc5 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Thu, 29 Jun 2023 11:11:37 +0100
Subject: [PATCH 03/13] addressing PR feedback

---
 docs/refs.bib                                 |  11 +-
 .../_cvxpy_threshold_optimizer.py             | 121 ++++++++++--------
 fairlearn/postprocessing/_cvxpy_utils.py      |  12 +-
 .../postprocessing/_randomized_classifiers.py |   2 +-
 fairlearn/postprocessing/_roc_utils.py        |   3 +-
 .../postprocessing/_threshold_optimizer.py    |   3 +
 fairlearn/reductions/_moments/error_rate.py   |   7 +-
 fairlearn/utils/_common.py                    |   6 +
 .../moments/test_moments_error_rate.py        |   2 +-
 9 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/docs/refs.bib b/docs/refs.bib
index 51c2860ca..5258734ca 100644
--- a/docs/refs.bib
+++ b/docs/refs.bib
@@ -425,4 +425,13 @@ @article{yeh2009comparisons
 @misc{uscode2011title15chapter41subchapteriv,
   title={United States Code 2011 Edition - Title 15 Commerce and Trade - Chapter 41 Consumer Credit Protection - Subchapter IV—Equal Credit Opportunity},
   url={https://www.govinfo.gov/content/pkg/USCODE-2011-title15/html/USCODE-2011-title15-chap41-subchapIV.htm}
-}
\ No newline at end of file
+}
+
+@misc{cruz2023unprocessing,
+      title={Unprocessing Seven Years of Algorithmic Fairness}, 
+      author={Andr\'{e} F. Cruz and Moritz Hardt},
+      year={2023},
+      eprint={2306.07261},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 84cdfd131..f056adbff 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -7,10 +7,12 @@
 - Add option for constraining equality of positive predictions (independence
 criterion, aka demographic parity);
 - Add option to use l1 or linf distances for maximum tolerance between points.
-  - Currently 'equal_odds' is defined using l-infinity distance (max between
+  - Currently 'equalized_odds' is defined using l-infinity distance (max between
   TPR and FPR distances);
 
 """
+from __future__ import annotations
+
 import logging
 from itertools import product
 from typing import Callable
@@ -20,9 +22,10 @@
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 
 from fairlearn.utils._input_validation import _validate_and_reformat_input
-from fairlearn.reductions._moments.error_rate import _MESSAGE_BAD_COSTS
+from fairlearn.utils._common import _get_soft_predictions
+from fairlearn.utils._common import _MESSAGE_BAD_COSTS
 
-from ._cvxpy_utils import compute_equal_odds_optimum
+from ._cvxpy_utils import compute_equalized_odds_optimum
 from ._roc_utils import (
     roc_convex_hull,
     calc_cost_of_point,
@@ -43,7 +46,8 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     given a maximum tolerance (or slack) on the fairness constraint fulfillment.
     
     This optimization problem amounts to a Linear Program (LP) as detailed in 
-    [1]_. Solving the LP requires installing `cvxpy`.
+    :footcite:ct:`cruz2023reductions`. Solving the LP requires installing 
+    `cvxpy`.
 
     Read more in the :ref:`User Guide <postprocessing>`.
 
@@ -57,8 +61,11 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
         predictions.
 
     tolerance : float
-        The absolute tolerance for the equal odds fairness constraint.
-        Will allow for `tolerance` difference between group-wise ROC points.
+        The absolute tolerance for the equalized odds fairness constraint.
+        Will allow for at most `tolerance` distance between group-wise ROC 
+        points (where distance is measured using l-infinity norm). Provided
+        value must be in range [0, 1] (closed interval).
+
 
     objective_costs : dict
         A dictionary detailing the cost for false positives and false negatives,
@@ -77,35 +84,40 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     Notes
     -----
     The procedure for relaxed fairness constraint fulfillment is detailed in
-    `Cruz et al. (2023) <https://arxiv.org/abs/2306.07261>`_ [1]_.
-    The underlying threshold optimization algorithm is based on 
-    `Hardt et al. (2016) <https://arxiv.org/abs/1610.02413>`_ [2]_.
+    :footcite:ct:`cruz2023reductions`.
 
-    References
-    ----------
-    .. [1] A. Cruz, and M. Hardt, "Unprocessing Seven Years of 
-       Algorithmic Fairness," arXiv.org, 15-Jun-2023.
-       [Online]. Available: https://arxiv.org/abs/2306.07261.
-
-    .. [2] M. Hardt, E. Price, and N. Srebro, "Equality of Opportunity in
-       Supervised Learning," arXiv.org, 07-Oct-2016.
-       [Online]. Available: https://arxiv.org/abs/1610.02413.
+    The underlying threshold optimization algorithm is based on 
+    :footcite:ct:`hardt2016equality`.
 
+    This method is also implemented in its 
+    `standalone Python package <https://github.com/socialfoundations/error-parity>`_.    # noqa
     """
 
     def __init__(
             self,
-            estimator: BaseEstimator,
+            predictor: BaseEstimator,
             tolerance: float,
             objective_costs: dict = None,
             grid_size: int = 1000,
-            seed: int = None,
+            predict_method: str = "auto",
+            random_state: int = None,
         ):
 
         # Save arguments
-        self.estimator = estimator
+        self.predictor = predictor
         self.tolerance = tolerance
+        if (
+            not isinstance(self.tolerance, (float, int)) 
+            or self.tolerance < 0 or self.tolerance > 1
+        ):
+            raise ValueError(
+                f"Invalid `tolerance` provided: received "
+                f"tolerance={self.tolerance}, but value should be in range "
+                f"[0, 1].")
+
         self.max_grid_size = grid_size
+        self.predict_method = predict_method
+        self.random_state = random_state
 
         # Unpack objective costs
         if objective_costs is None:
@@ -115,9 +127,6 @@ def __init__(
             self.false_pos_cost, self.false_neg_cost = \
                 self.unpack_objective_costs(objective_costs)
 
-        # Randomly sample a seed if none was provided
-        self.seed = np.random.randint(2 ** 20)
-
         # Initialize instance variables
         self._all_roc_data: dict = None
         self._all_roc_hulls: dict = None
@@ -174,22 +183,27 @@ def global_roc_point(self) -> np.ndarray:
 
     def cost(
             self,
-            false_pos_cost: float = None,
-            false_neg_cost: float = None,
+            *,
+            false_pos_cost: float = 1.0,
+            false_neg_cost: float = 1.0,
         ) -> float:
         """Computes the theoretical cost of the solution found.
 
-        Use false_pos_cost==false_neg_cost==1 for the 0-1 loss (the 
-        standard error rate), which amounts to maximizing accuracy.
+        Use false_pos_cost=false_neg_cost=1 for the 0-1 loss (the standard error
+        rate), which amounts to maximizing accuracy.
+
+        You can find the cost realized from the LP optimization by calling:
+        >>> obj.cost(
+        >>>     false_pos_cost=obj.false_pos_cost,
+        >>>     false_neg_cost=obj.false_neg_cost,
+        >>> )
 
         Parameters
         ----------
         false_pos_cost : float, optional
-            The cost of a FALSE POSITIVE error, by default will take the value
-            given in the object's constructor.
+            The cost of a FALSE POSITIVE error, by default 1.
         false_neg_cost : float, optional
-            The cost of a FALSE NEGATIVE error, by default will take the value
-            given in the object's constructor.
+            The cost of a FALSE NEGATIVE error, by default 1.
 
         Returns
         -------
@@ -203,8 +217,8 @@ def cost(
             fpr=global_fpr,
             fnr=1 - global_tpr,
             prevalence=self._global_prevalence,
-            false_pos_cost=false_pos_cost or self.false_pos_cost,
-            false_neg_cost=false_neg_cost or self.false_neg_cost,
+            false_pos_cost=false_pos_cost,
+            false_neg_cost=false_neg_cost,
         )
     
     def constraint_violation(self) -> float:
@@ -216,9 +230,9 @@ def constraint_violation(self) -> float:
         float
             The fairness constraint violation.
         """
-        return self.equal_odds_violation()
+        return self.equalized_odds_violation()
 
-    def equal_odds_violation(self) -> float:
+    def equalized_odds_violation(self) -> float:
         """Computes the theoretical violation of the equal odds constraint 
         (i.e., the maximum l-inf distance between the ROC point of any pair
         of groups).
@@ -233,7 +247,7 @@ def equal_odds_violation(self) -> float:
         n_groups = len(self.groupwise_roc_points)
 
         # Compute l-inf distance between each pair of groups
-        linf_constraint_violation = [
+        l_inf_constraint_violation = [
             (np.linalg.norm(
                 self.groupwise_roc_points[i] - self.groupwise_roc_points[j],
                 ord=np.inf), (i, j))
@@ -242,7 +256,7 @@ def equal_odds_violation(self) -> float:
         ]
 
         # Return the maximum
-        max_violation, (groupA, groupB) = max(linf_constraint_violation)
+        max_violation, (groupA, groupB) = max(l_inf_constraint_violation)
         logging.info(
             f"Maximum fairness violation is between "
             f"group={groupA} (p={self.groupwise_roc_points[groupA]}) and "
@@ -252,7 +266,7 @@ def equal_odds_violation(self) -> float:
         return max_violation
 
 
-    def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndarray = None):
+    def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y_scores: np.ndarray = None):
         """Fit this predictor to achieve the (possibly relaxed) equal odds 
         constraint on the provided data.
 
@@ -262,9 +276,9 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
             The input features.
         y : np.ndarray
             The input labels.
-        group : np.ndarray
-            The group membership of each sample.
-            Assumes groups are numbered [0, 1, ..., num_groups-1].
+        sensitive_features : np.ndarray
+            The sensitive features (group membership) of each sample.
+            Assumes groups are numbered [0, 1, ..., num_groups-1]. # TODO validate input and convert to proper format
         y_scores : np.ndarray, optional
             The pre-computed model predictions on this data.
 
@@ -277,7 +291,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
         # Compute group stats
         self._global_prevalence = np.sum(y) / len(y)
 
-        unique_groups = np.unique(group)
+        unique_groups = np.unique(sensitive_features)
         num_groups = len(unique_groups)
         if np.max(unique_groups) > num_groups-1:
             raise ValueError(
@@ -288,14 +302,15 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
 
         # Relative group sizes for LN and LP samples
         group_sizes_label_neg = np.array([
-            np.sum(1 - y[group == g]) for g in unique_groups
+            np.sum(1 - y[sensitive_features == g]) for g in unique_groups
         ])
         group_sizes_label_pos = np.array([
-            np.sum(y[group == g]) for g in unique_groups
+            np.sum(y[sensitive_features == g]) for g in unique_groups
         ])
 
         if np.sum(group_sizes_label_neg) + np.sum(group_sizes_label_pos) != len(y):
-            raise RuntimeError(f"Failed sanity check. Are you using non-binary labels?")
+            raise RuntimeError(
+                f"Failed input validation. Are you using non-binary labels?")
 
         # Convert to relative sizes
         group_sizes_label_neg = group_sizes_label_neg.astype(float) / np.sum(group_sizes_label_neg)
@@ -303,11 +318,11 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
 
         # Compute group-wise ROC curves
         if y_scores is None:
-            y_scores = self.estimator(X)
+            y_scores = _get_soft_predictions(self.predictor, X, self.predict_method) 
 
         self._all_roc_data = dict()
         for g in unique_groups:
-            group_filter = group == g
+            group_filter = sensitive_features == g
 
             roc_curve_data = roc_curve(
                 y[group_filter],
@@ -336,7 +351,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
             self._all_roc_hulls[g] = roc_convex_hull(curr_roc_points)
 
         # Find the group-wise optima that fulfill the fairness criteria
-        self._groupwise_roc_points, self._global_roc_point = compute_equal_odds_optimum(
+        self._groupwise_roc_points, self._global_roc_point = compute_equalized_odds_optimum(
             groupwise_roc_hulls=self._all_roc_hulls,
             fairness_tolerance=self.tolerance,
             group_sizes_label_pos=group_sizes_label_pos,
@@ -349,10 +364,10 @@ def fit(self, X: np.ndarray, y: np.ndarray, group: np.ndarray, y_scores: np.ndar
         # Construct each group-specific classifier
         all_rand_clfs = {
             g: RandomizedClassifier.construct_at_target_ROC(
-                predictor=self.estimator,
+                predictor=self.predictor,
                 roc_curve_data=self._all_roc_data[g],
                 target_roc_point=self._groupwise_roc_points[g],
-                seed=self.seed,
+                seed=self.random_state,
             )
             for g in unique_groups
         }
@@ -391,5 +406,5 @@ def _check_fit_status(self, raise_error: bool = True) -> bool:
 
         return True
 
-    def predict(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
-        return self._realized_classifier(X, group)
+    def predict(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarray:
+        return self._realized_classifier(X, sensitive_features=sensitive_features)
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index 0ca50394d..7e07b103c 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -113,7 +113,7 @@ def compute_halfspace_inequality(
     # > the halfspace's b intercept value will correspond to this value of x;
     if np.isinf(slope):
 
-        # Sanity check for vertical line
+        # Validating vertical line
         if not np.isclose(p1x, p2x):
             raise RuntimeError(
                 "Got infinite slope for line containing two points with "
@@ -131,7 +131,7 @@ def compute_halfspace_inequality(
     # > the halfspace's b intercept value will correspond to this value of y;
     elif np.isclose(slope, 0.0):
 
-        # Sanity checks for horizontal line
+        # Validating horizontal line
         if not np.isclose(p1y, p2y) or not np.isclose(p1y, intercept):
             raise RuntimeError(
                 f"Invalid horizontal line; points p1 and p2 should have same "
@@ -220,7 +220,7 @@ def make_cvxpy_point_in_polygon_constraints(
     ]
 
 
-def compute_equal_odds_optimum(
+def compute_equalized_odds_optimum(
         groupwise_roc_hulls: dict[int, np.ndarray],
         fairness_tolerance: float,
         group_sizes_label_pos: np.ndarray,
@@ -333,7 +333,7 @@ def compute_equal_odds_optimum(
     groupwise_roc_points = np.vstack([p.value for p in groupwise_roc_points_vars])
     global_roc_point = global_roc_point_var.value
 
-    # Sanity check solution cost
+    # Validating solution cost
     solution_cost = calc_cost_of_point(
         fpr=global_roc_point[0],
         fnr=1-global_roc_point[1],
@@ -344,11 +344,11 @@ def compute_equal_odds_optimum(
 
     if not np.isclose(solution_cost, prob.value):
         logging.error(
-            f"Solution was found but cost did not pass sanity check! "
+            f"Solution was found but cost did not pass validation! "
             f"Found solution ROC point {global_roc_point} with theoretical cost "
             f"{prob.value}, but actual cost is {solution_cost};")
 
-    # Sanity check congruency between group-wise ROC points and global ROC point
+    # Validating congruency between group-wise ROC points and global ROC point
     global_roc_from_groupwise = compute_global_roc_from_groupwise(
         groupwise_roc_points=groupwise_roc_points,
         groupwise_label_pos_weight=group_sizes_label_pos,
diff --git a/fairlearn/postprocessing/_randomized_classifiers.py b/fairlearn/postprocessing/_randomized_classifiers.py
index 9926bde93..229ab05a4 100644
--- a/fairlearn/postprocessing/_randomized_classifiers.py
+++ b/fairlearn/postprocessing/_randomized_classifiers.py
@@ -270,7 +270,7 @@ def find_weights_given_two_points(
         # Calculate weights for P upwards and P downwards
         weight_P_upwards = (target_tpr - point_P_downwards[1]) / (point_P_upwards[1] - point_P_downwards[1])
 
-        # Sanity checks...
+        # Validating triangulation results
         all_points = np.vstack((point_A, point_B, point_P_downwards))
         all_weights = np.hstack((weight_P_upwards * weights_AB, 1 - weight_P_upwards))
 
diff --git a/fairlearn/postprocessing/_roc_utils.py b/fairlearn/postprocessing/_roc_utils.py
index 244632155..9b7506868 100644
--- a/fairlearn/postprocessing/_roc_utils.py
+++ b/fairlearn/postprocessing/_roc_utils.py
@@ -17,6 +17,7 @@ def calc_cost_of_point(
         fpr: float,
         fnr: float,
         prevalence: float,
+        *,
         false_pos_cost: float = 1.,
         false_neg_cost: float = 1.,
     ) -> float:
@@ -104,7 +105,7 @@ def compute_global_roc_from_groupwise(
     """
     n_groups, _ = groupwise_roc_points.shape
 
-    # Some initial sanity checks
+    # Validating input shapes
     if (len(groupwise_label_pos_weight) != len(groupwise_label_neg_weight) or
         len(groupwise_label_pos_weight) != n_groups):
        raise ValueError(
diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index e3642915b..08ee0b1a2 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -130,6 +130,9 @@ class ThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     estimator : object
         A `scikit-learn compatible estimator <https://scikit-learn.org/stable/developers/develop.html#estimators>`_  # noqa
         whose output is postprocessed.
+        The estimator should output real-valued scores, as postprocessing 
+        results will be extremely poor when performed over binarized 
+        predictions.
 
     constraints : str, default='demographic_parity'
         Fairness constraints under which threshold optimization is performed.
diff --git a/fairlearn/reductions/_moments/error_rate.py b/fairlearn/reductions/_moments/error_rate.py
index ab390d5d4..6d62bf595 100644
--- a/fairlearn/reductions/_moments/error_rate.py
+++ b/fairlearn/reductions/_moments/error_rate.py
@@ -4,16 +4,11 @@
 import numpy as np
 import pandas as pd
 
+from fairlearn.utils._common import _MESSAGE_BAD_COSTS
 from fairlearn.utils._input_validation import _validate_and_reformat_input
 
 from .moment import _ALL, _LABEL, ClassificationMoment
 
-_MESSAGE_BAD_COSTS = (
-    "costs needs to be a dictionary with keys "
-    "'fp' and 'fn' containing non-negative values, which are not both zero"
-)
-
-
 class ErrorRate(ClassificationMoment):
     r"""Misclassification error as a moment.
 
diff --git a/fairlearn/utils/_common.py b/fairlearn/utils/_common.py
index bc76f33fc..bad3119e1 100644
--- a/fairlearn/utils/_common.py
+++ b/fairlearn/utils/_common.py
@@ -2,6 +2,12 @@
 # Licensed under the MIT License.
 
 
+_MESSAGE_BAD_COSTS = (
+    "costs needs to be a dictionary with keys "
+    "'fp' and 'fn' containing non-negative values, which are not both zero"
+)
+
+
 def _get_soft_predictions(estimator, X, predict_method):
     r"""Return soft predictions of a classifier.
 
diff --git a/test/unit/reductions/moments/test_moments_error_rate.py b/test/unit/reductions/moments/test_moments_error_rate.py
index 870a71fb5..34659c1b9 100644
--- a/test/unit/reductions/moments/test_moments_error_rate.py
+++ b/test/unit/reductions/moments/test_moments_error_rate.py
@@ -4,7 +4,7 @@
 import pytest
 
 from fairlearn.reductions import ErrorRate
-from fairlearn.reductions._moments.error_rate import _MESSAGE_BAD_COSTS
+from fairlearn.utils._common import _MESSAGE_BAD_COSTS
 
 BAD_COSTS_EXAMPLES = [
     {"fp": 0.0, "fn": 0.0},

From 2bb9a769c1a2e0153e8be857f8cf0f5a50068877 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Thu, 29 Jun 2023 14:26:32 +0100
Subject: [PATCH 04/13] making RelaxedThesholdOptimizer extensible to future
 constraint implementations

---
 .../_cvxpy_threshold_optimizer.py             | 133 ++++++++++--------
 fairlearn/postprocessing/_cvxpy_utils.py      |  61 ++++++--
 fairlearn/reductions/_moments/error_rate.py   |  13 +-
 fairlearn/utils/_common.py                    |  39 +++++
 4 files changed, 164 insertions(+), 82 deletions(-)

diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index f056adbff..07bd569b5 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -15,17 +15,21 @@
 
 import logging
 from itertools import product
-from typing import Callable
 
 import numpy as np
 from sklearn.metrics import roc_curve
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
+from fairlearn.postprocessing._cvxpy_utils import 
 
 from fairlearn.utils._input_validation import _validate_and_reformat_input
 from fairlearn.utils._common import _get_soft_predictions
-from fairlearn.utils._common import _MESSAGE_BAD_COSTS
+from fairlearn.utils._common import unpack_fp_fn_costs
 
-from ._cvxpy_utils import compute_equalized_odds_optimum
+from ._cvxpy_utils import (
+    compute_fair_optimum,
+    ALL_CONSTRAINTS,
+    NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE,
+)
 from ._roc_utils import (
     roc_convex_hull,
     calc_cost_of_point,
@@ -44,7 +48,7 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     The method amounts to finding the set of (potentially randomized) 
     group-specific decision thresholds that maximize some objective (e.g., accuracy),
     given a maximum tolerance (or slack) on the fairness constraint fulfillment.
-    
+
     This optimization problem amounts to a Linear Program (LP) as detailed in 
     :footcite:ct:`cruz2023reductions`. Solving the LP requires installing 
     `cvxpy`.
@@ -53,13 +57,20 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
 
     Parameters
     ----------
-    estimator : object
-        A `scikit-learn compatible estimator <https://scikit-learn.org/stable/developers/develop.html#estimators>`_  # noqa
+    predictor : object
+        A prefit `scikit-learn compatible estimator <https://scikit-learn.org/stable/developers/develop.html#estimators>`_  # noqa
         whose output will be postprocessed.
-        The estimator should output real-valued scores, as postprocessing 
+        The predictor should output real-valued scores, as postprocessing 
         results will be extremely poor when performed over binarized 
         predictions.
 
+    constraint : str, default='equalized_odds'
+        Fairness constraint under which threshold optimization is performed. 
+        Possible inputs currently are:
+
+            'equalized_odds'
+                match true positive and false positive rates across groups
+
     tolerance : float
         The absolute tolerance for the equalized odds fairness constraint.
         Will allow for at most `tolerance` distance between group-wise ROC 
@@ -67,7 +78,7 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
         value must be in range [0, 1] (closed interval).
 
 
-    objective_costs : dict
+    objective_costs : dict, optional
         A dictionary detailing the cost for false positives and false negatives,
         of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`. Will use the 0-1
         loss by default (maximum accuracy).
@@ -77,10 +88,41 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
         default 1000. This corresponds to the maximum number of different 
         thresholds to use over a predictor.
 
-    seed : int
+    predict_method : {'auto', 'predict_proba', 'decision_function', 'predict'\
+            }, default='auto'
+
+        Defines which method of the ``estimator`` is used to get the output
+        values.
+
+            'auto'
+                use one of :code:`predict_proba`, :code:`decision_function`, or 
+                :code:`predict`, in that order.
+            
+            'predict_proba'
+                use the second column from the output of :code:`predict_proba`. 
+                It is assumed that the second column represents the positive 
+                outcome.
+            
+            'decision_function'
+                use the raw values given by the :code:`decision_function`.
+            
+            'predict'
+                use the hard values reported by the :code:`predict` method if 
+                estimator is a classifier, and the regression values if 
+                estimator is a regressor.
+                Warning: postprocessing may lead to poor results when using 
+                :code:`predict_method='predict'` with classifiers, as that will
+                binarize predictions.
+
+    random_state : int, optional
         A random seed used for reproducibility when producing randomized
         classifiers, by default None (default: non-reproducible behavior).
 
+    Raises
+    ------
+    ValueError
+        A ValueError will be raised if constructor arguments are not valid.
+
     Notes
     -----
     The procedure for relaxed fairness constraint fulfillment is detailed in
@@ -91,12 +133,15 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
 
     This method is also implemented in its 
     `standalone Python package <https://github.com/socialfoundations/error-parity>`_.    # noqa
+
     """
 
     def __init__(
             self,
+            *,
             predictor: BaseEstimator,
-            tolerance: float,
+            constraint: str = "equalized_odds",
+            tolerance: float = 0.0,
             objective_costs: dict = None,
             grid_size: int = 1000,
             predict_method: str = "auto",
@@ -105,7 +150,17 @@ def __init__(
 
         # Save arguments
         self.predictor = predictor
+        self.constraint = constraint
         self.tolerance = tolerance
+        self.max_grid_size = grid_size
+        self.predict_method = predict_method
+        self.random_state = random_state
+
+        # Validate constraint
+        if self.constraint not in ALL_CONSTRAINTS:
+            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+
+        # Validate constraint tolerance
         if (
             not isinstance(self.tolerance, (float, int)) 
             or self.tolerance < 0 or self.tolerance > 1
@@ -115,17 +170,13 @@ def __init__(
                 f"tolerance={self.tolerance}, but value should be in range "
                 f"[0, 1].")
 
-        self.max_grid_size = grid_size
-        self.predict_method = predict_method
-        self.random_state = random_state
-
         # Unpack objective costs
         if objective_costs is None:
             self.false_pos_cost = 1.0
             self.false_neg_cost = 1.0
         else:
             self.false_pos_cost, self.false_neg_cost = \
-                self.unpack_objective_costs(objective_costs)
+                unpack_fp_fn_costs(objective_costs)
 
         # Initialize instance variables
         self._all_roc_data: dict = None
@@ -135,44 +186,6 @@ def __init__(
         self._global_prevalence: float = None
         self._realized_classifier: EnsembleGroupwiseClassifiers = None
 
-    @staticmethod
-    def unpack_objective_costs(objective_costs: dict) -> tuple[float, float]:
-        """Validates and unpacks the given `objective_costs`.
-
-        Parameters
-        ----------
-        objective_costs : dict
-            A dictionary detailing the cost for false positives and false negatives,
-            of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`. Will use the 0-1
-            loss by default (maximum accuracy).
-            
-        Returns
-        -------
-        tuple[float, float]
-            A tuple respectively composed of the cost of false positives and the
-            cost of false negatives, i.e., a tuple with 
-            :code:`(fp_cost, fn_cost)`.
-
-        Raises
-        ------
-        ValueError
-            Raised when the provided costs are invalid (e.g., missing keys
-            in the provided dict, or negative costs).
-        """
-        if (
-            type(objective_costs) is dict
-            and objective_costs.keys() == {"fp", "fn"}
-            and objective_costs["fp"] >= 0.0
-            and objective_costs["fn"] >= 0.0
-            and objective_costs["fp"] + objective_costs["fn"] > 0.0
-        ):
-            fp_cost = objective_costs["fp"]
-            fn_cost = objective_costs["fn"]
-        else:
-            raise ValueError(_MESSAGE_BAD_COSTS)
-        
-        return fp_cost, fn_cost
-
     @property
     def groupwise_roc_points(self) -> np.ndarray:
         return self._groupwise_roc_points
@@ -230,7 +243,10 @@ def constraint_violation(self) -> float:
         float
             The fairness constraint violation.
         """
-        return self.equalized_odds_violation()
+        if self.constraint == "equalized_odds":
+            return self.equalized_odds_violation()
+        else:
+            raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
     def equalized_odds_violation(self) -> float:
         """Computes the theoretical violation of the equal odds constraint 
@@ -351,9 +367,10 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
             self._all_roc_hulls[g] = roc_convex_hull(curr_roc_points)
 
         # Find the group-wise optima that fulfill the fairness criteria
-        self._groupwise_roc_points, self._global_roc_point = compute_equalized_odds_optimum(
+        self._groupwise_roc_points, self._global_roc_point = compute_fair_optimum(
+            fairness_constraint=self.constraint,
             groupwise_roc_hulls=self._all_roc_hulls,
-            fairness_tolerance=self.tolerance,
+            tolerance=self.tolerance,
             group_sizes_label_pos=group_sizes_label_pos,
             group_sizes_label_neg=group_sizes_label_neg,
             global_prevalence=self._global_prevalence,
@@ -363,7 +380,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
 
         # Construct each group-specific classifier
         all_rand_clfs = {
-            g: RandomizedClassifier.construct_at_target_ROC(
+            g: RandomizedClassifier.construct_at_target_ROC(    # TODO: check InterpolatedThresholder
                 predictor=self.predictor,
                 roc_curve_data=self._all_roc_data[g],
                 target_roc_point=self._groupwise_roc_points[g],
@@ -373,7 +390,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
         }
 
         # Construct the global classifier (can be used for all groups)
-        self._realized_classifier = EnsembleGroupwiseClassifiers(group_to_clf=all_rand_clfs)
+        self._realized_classifier = EnsembleGroupwiseClassifiers(group_to_clf=all_rand_clfs)    # TODO: check InterpolatedThresholder
         return self
     
     def _check_fit_status(self, raise_error: bool = True) -> bool:
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index 7e07b103c..814cf7ab0 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -11,6 +11,18 @@
 from ._roc_utils import calc_cost_of_point, compute_global_roc_from_groupwise
 
 
+# Set of all fairness constraints with a cvxpy LP implementation
+ALL_CONSTRAINTS = {
+    "equalized_odds",
+}
+
+NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = (
+    "Currently only the following constraints are supported: {}.".format(
+        ", ".join(sorted(ALL_CONSTRAINTS))
+    )
+)
+
+
 # Maximum distance from solution to feasibility or optimality
 SOLUTION_TOLERANCE = 1e-9
 
@@ -220,9 +232,11 @@ def make_cvxpy_point_in_polygon_constraints(
     ]
 
 
-def compute_equalized_odds_optimum(
+def compute_fair_optimum(
+        *,
+        fairness_constraint: str,
+        tolerance: float,
         groupwise_roc_hulls: dict[int, np.ndarray],
-        fairness_tolerance: float,
         group_sizes_label_pos: np.ndarray,
         group_sizes_label_neg: np.ndarray,
         global_prevalence: float,
@@ -235,22 +249,34 @@ def compute_equalized_odds_optimum(
 
     Parameters
     ----------
+    fairness_constraint : str
+        The name of the fairness constraint under which the LP will be 
+        optimized. Possible inputs are:
+
+            'equalized_odds'
+                match true positive and false positive rates across groups
+
+    tolerance : float
+        A value for the tolerance when enforcing the fairness constraint.
+
     groupwise_roc_hulls : dict[int, np.ndarray]
         A dict mapping each group to the convex hull of the group's ROC curve.
         The convex hull is an np.array of shape (n_points, 2), containing the 
         points that form the convex hull of the ROC curve, sorted in COUNTER
         CLOCK-WISE order.
-    fairness_tolerance : float
-        A value for the tolerance when enforcing the equal odds fairness 
-        constraint, i.e., equality of TPR and FPR among groups.
+
     group_sizes_label_pos : np.ndarray
         The relative or absolute number of positive samples in each group.
+
     group_sizes_label_neg : np.ndarray
         The relative or absolute number of negative samples in each group.
+
     global_prevalence : float
         The global prevalence of positive samples.
+
     false_positive_cost : float, optional
         The cost of a FALSE POSITIVE error, by default 1.
+
     false_negative_cost : float, optional
         The cost of a FALSE NEGATIVE error, by default 1.
 
@@ -262,6 +288,10 @@ def compute_equalized_odds_optimum(
         2: an array with the single global ROC point for the solution.
     """
     _import_cvxpy_if_available()
+    import cvxpy as cp
+
+    if fairness_constraint not in ALL_CONSTRAINTS:
+        raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
     n_groups = len(groupwise_roc_hulls)
     if n_groups != len(group_sizes_label_neg) or n_groups != len(group_sizes_label_pos):
@@ -285,14 +315,19 @@ def compute_equalized_odds_optimum(
         global_roc_point_var[1] == group_sizes_label_pos @ np.array([p[1] for p in groupwise_roc_points_vars]),
     ]
 
-    # Relaxed equal odds constraints
-    # 1st option - CONSTRAINT FOR: l-inf distance between any two group's ROCs being less than epsilon
-    constraints += [
-        cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) <= fairness_tolerance
-        for i, j in product(range(n_groups), range(n_groups))
-        if i < j
-        # if i != j
-    ]
+    ### APPLY FAIRNESS CONSTRAINTS
+    # IF "equalized_odds"
+    # > i.e., CONSTRAIN l-inf distance between any two group's ROCs being less than `tolerance`
+    if fairness_constraint == "equalized_odds":
+        constraints += [
+            cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) <= tolerance
+            for i, j in product(range(n_groups), range(n_groups))
+            if i < j
+        ]
+    
+    else:
+        # TODO: implement other constraints here
+        raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
     # Constraints for points in respective group-wise ROC curves
     for idx in range(n_groups):
diff --git a/fairlearn/reductions/_moments/error_rate.py b/fairlearn/reductions/_moments/error_rate.py
index 6d62bf595..edbd5f204 100644
--- a/fairlearn/reductions/_moments/error_rate.py
+++ b/fairlearn/reductions/_moments/error_rate.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 
-from fairlearn.utils._common import _MESSAGE_BAD_COSTS
+from fairlearn.utils._common import unpack_fp_fn_costs
 from fairlearn.utils._input_validation import _validate_and_reformat_input
 
 from .moment import _ALL, _LABEL, ClassificationMoment
@@ -43,17 +43,8 @@ def __init__(self, *, costs=None):
         if costs is None:
             self.fp_cost = 1.0
             self.fn_cost = 1.0
-        elif (
-            type(costs) is dict
-            and costs.keys() == {"fp", "fn"}
-            and costs["fp"] >= 0.0
-            and costs["fn"] >= 0.0
-            and costs["fp"] + costs["fn"] > 0.0
-        ):
-            self.fp_cost = costs["fp"]
-            self.fn_cost = costs["fn"]
         else:
-            raise ValueError(_MESSAGE_BAD_COSTS)
+            self.fp_cpst, self.fn_cost = unpack_fp_fn_costs(costs)
 
     def load_data(self, X, y, *, sensitive_features, control_features=None):
         """Load the specified data into the object."""
diff --git a/fairlearn/utils/_common.py b/fairlearn/utils/_common.py
index bad3119e1..2af6e1b68 100644
--- a/fairlearn/utils/_common.py
+++ b/fairlearn/utils/_common.py
@@ -1,6 +1,7 @@
 # Copyright (c) Fairlearn contributors.
 # Licensed under the MIT License.
 
+from __future__ import annotations
 
 _MESSAGE_BAD_COSTS = (
     "costs needs to be a dictionary with keys "
@@ -8,6 +9,44 @@
 )
 
 
+def unpack_fp_fn_costs(costs: dict) -> tuple[float, float]:
+    """Validates and unpacks the given `costs`.
+
+    Parameters
+    ----------
+    costs : dict
+        A dictionary detailing the cost for false positives and false negatives,
+        of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`.
+        
+    Returns
+    -------
+    tuple[float, float]
+        A tuple respectively composed of the cost of false positives and the
+        cost of false negatives, i.e., a tuple with 
+        :code:`(fp_cost, fn_cost)`.
+
+    Raises
+    ------
+    ValueError
+        Raised when the provided costs are invalid (e.g., missing keys
+        in the provided dict, or negative costs).
+    """
+    if (
+        type(costs) is dict
+        and costs.keys() == {"fp", "fn"}
+        and costs["fp"] >= 0.0
+        and costs["fn"] >= 0.0
+        and costs["fp"] + costs["fn"] > 0.0
+    ):
+        fp_cost = costs["fp"]
+        fn_cost = costs["fn"]
+
+    else:
+        raise ValueError(_MESSAGE_BAD_COSTS)
+    
+    return fp_cost, fn_cost
+
+
 def _get_soft_predictions(estimator, X, predict_method):
     r"""Return soft predictions of a classifier.
 

From 23f879eb5a3cff5856782acc8a58f493d4743a89 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Thu, 29 Jun 2023 14:35:59 +0100
Subject: [PATCH 05/13] added cvxpy solutions for TPR,FPR,TNR,FNR constraints

---
 .../_cvxpy_threshold_optimizer.py             |  8 +++---
 fairlearn/postprocessing/_cvxpy_utils.py      | 26 ++++++++++++++++---
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 07bd569b5..28dada91d 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -3,7 +3,7 @@
 TODO
 ----
 - Add option for constraining only equality of FPR or TPR (currently it must be 
-both -> equal odds);
+both -> equalized odds);
 - Add option for constraining equality of positive predictions (independence
 criterion, aka demographic parity);
 - Add option to use l1 or linf distances for maximum tolerance between points.
@@ -249,7 +249,7 @@ def constraint_violation(self) -> float:
             raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
     def equalized_odds_violation(self) -> float:
-        """Computes the theoretical violation of the equal odds constraint 
+        """Computes the theoretical violation of the equalized odds constraint 
         (i.e., the maximum l-inf distance between the ROC point of any pair
         of groups).
 
@@ -283,8 +283,8 @@ def equalized_odds_violation(self) -> float:
 
 
     def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y_scores: np.ndarray = None):
-        """Fit this predictor to achieve the (possibly relaxed) equal odds 
-        constraint on the provided data.
+        """Find the optimal postprocessing that fulfills the (possibly relaxed) 
+        fairness constraint on the provided data.
 
         Parameters
         ----------
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index 814cf7ab0..2f76863e7 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -316,7 +316,7 @@ def compute_fair_optimum(
     ]
 
     ### APPLY FAIRNESS CONSTRAINTS
-    # IF "equalized_odds"
+    # If "equalized_odds"
     # > i.e., CONSTRAIN l-inf distance between any two group's ROCs being less than `tolerance`
     if fairness_constraint == "equalized_odds":
         constraints += [
@@ -324,9 +324,29 @@ def compute_fair_optimum(
             for i, j in product(range(n_groups), range(n_groups))
             if i < j
         ]
-    
+
+    # If some rate parity, i.e., parity of one of {TPR, FPR, TNR, FNR}
+    elif fairness_constraint.endswith("rate_parity"):
+
+        roc_idx_of_interest: int
+        if fairness_constraint == "true_positive_rate_parity" or fairness_constraint == "false_negative_rate_parity":
+            roc_idx_of_interest = 1
+
+        elif fairness_constraint == "false_positive_rate_parity" or fairness_constraint == "false_negative_rate_parity":
+            roc_idx_of_interest = 0
+        
+        else:
+            # This point should never be reached as fairness constraint was previously validated
+            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+
+        constraints += [
+            cp.abs(groupwise_roc_points_vars[roc_idx_of_interest][i] - groupwise_roc_points_vars[roc_idx_of_interest][j]) <= tolerance
+            for i, j in product(range(n_groups), range(n_groups))
+            if i < j
+        ]
+
+    # TODO: implement other constraints here
     else:
-        # TODO: implement other constraints here
         raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
     # Constraints for points in respective group-wise ROC curves

From 6009ffe415f71dde8630bc5ffd086b0412c9b6b4 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Tue, 4 Jul 2023 14:21:04 +0100
Subject: [PATCH 06/13] code now runs fine

---
 .../_cvxpy_threshold_optimizer.py             | 27 +++++++++++--------
 fairlearn/postprocessing/_cvxpy_utils.py      |  3 ++-
 .../postprocessing/_randomized_classifiers.py | 18 ++++++-------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 28dada91d..9b34d29a2 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -19,7 +19,6 @@
 import numpy as np
 from sklearn.metrics import roc_curve
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
-from fairlearn.postprocessing._cvxpy_utils import 
 
 from fairlearn.utils._input_validation import _validate_and_reformat_input
 from fairlearn.utils._common import _get_soft_predictions
@@ -152,7 +151,8 @@ def __init__(
         self.predictor = predictor
         self.constraint = constraint
         self.tolerance = tolerance
-        self.max_grid_size = grid_size
+        self.objective_costs = objective_costs
+        self.grid_size = grid_size
         self.predict_method = predict_method
         self.random_state = random_state
 
@@ -170,13 +170,8 @@ def __init__(
                 f"tolerance={self.tolerance}, but value should be in range "
                 f"[0, 1].")
 
-        # Unpack objective costs
-        if objective_costs is None:
-            self.false_pos_cost = 1.0
-            self.false_neg_cost = 1.0
-        else:
-            self.false_pos_cost, self.false_neg_cost = \
-                unpack_fp_fn_costs(objective_costs)
+        # Check objective costs
+        unpack_fp_fn_costs(self.objective_costs)
 
         # Initialize instance variables
         self._all_roc_data: dict = None
@@ -186,6 +181,16 @@ def __init__(
         self._global_prevalence: float = None
         self._realized_classifier: EnsembleGroupwiseClassifiers = None
 
+    @property
+    def false_pos_cost(self) -> np.ndarray:
+        fp_cost, _fn_cost = unpack_fp_fn_costs(self.objective_costs)
+        return fp_cost
+    
+    @property
+    def false_neg_cost(self) -> np.ndarray:
+        _fp_cost, fn_cost = unpack_fp_fn_costs(self.objective_costs)
+        return fn_cost
+
     @property
     def groupwise_roc_points(self) -> np.ndarray:
         return self._groupwise_roc_points
@@ -347,8 +352,8 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
 
             # Check if max_roc_ticks is exceeded
             fpr, tpr, thrs = roc_curve_data
-            if self.max_grid_size is not None and len(fpr) > self.max_grid_size:
-                indices_to_keep = np.arange(0, len(fpr), len(fpr) / self.max_grid_size).astype(int)
+            if self.grid_size is not None and len(fpr) > self.grid_size:
+                indices_to_keep = np.arange(0, len(fpr), len(fpr) / self.grid_size).astype(int)
 
                 # Bottom-left (0,0) and top-right (1,1) points must be kept
                 indices_to_keep[-1] = len(fpr) - 1
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index 2f76863e7..7eead547d 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -317,7 +317,7 @@ def compute_fair_optimum(
 
     ### APPLY FAIRNESS CONSTRAINTS
     # If "equalized_odds"
-    # > i.e., CONSTRAIN l-inf distance between any two group's ROCs being less than `tolerance`
+    # > i.e., constrain l-inf distance between any two groups' ROCs being less than `tolerance`
     if fairness_constraint == "equalized_odds":
         constraints += [
             cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) <= tolerance
@@ -326,6 +326,7 @@ def compute_fair_optimum(
         ]
 
     # If some rate parity, i.e., parity of one of {TPR, FPR, TNR, FNR}
+    # i.e., constrain absolute distance between any two groups' rate metric
     elif fairness_constraint.endswith("rate_parity"):
 
         roc_idx_of_interest: int
diff --git a/fairlearn/postprocessing/_randomized_classifiers.py b/fairlearn/postprocessing/_randomized_classifiers.py
index 229ab05a4..76ead5885 100644
--- a/fairlearn/postprocessing/_randomized_classifiers.py
+++ b/fairlearn/postprocessing/_randomized_classifiers.py
@@ -16,7 +16,7 @@
 
 class Classifier(ABC):
     @abstractmethod
-    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
         """Return predicted class, Y, for the given input features, X.
         """
         raise NotImplementedError
@@ -38,14 +38,14 @@ def __init__(
         self.score_predictor = score_predictor
         self.threshold = threshold
 
-    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
         """Computes predictions for the given samples, X.
 
         Parameters
         ----------
         X : np.ndarray
             The input samples, in shape (num_samples, num_features).
-        group : None, optional
+        sensitive_features : None, optional
             None. This argument will be ignored by this classifier as it does 
             not consider sensitive attributes.
 
@@ -92,7 +92,7 @@ def __init__(
         # Initiate random number generator
         self.rng = np.random.default_rng(seed)
 
-    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
+    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
         return (self.rng.random(size=len(X)) >= (1 - self.target_fpr)).astype(int)
 
 
@@ -113,9 +113,9 @@ def __init__(self, group_to_clf: dict[int | str, Callable]):
         """
         self.group_to_clf = group_to_clf
 
-    def __call__(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
+    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarray:
         """Compute predictions for the given input samples X, given their
-        sensitive attributes, group.
+        sensitive attributes, `sensitive_features`.
 
         Parameters
         ----------
@@ -130,7 +130,7 @@ def __call__(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
             The predictions, where the prediction for each sample is handed off
             to a group-specific classifier for that sample.
         """
-        if len(X) != len(group):
+        if len(X) != len(sensitive_features):
             raise ValueError(f"Invalid input sizes len(X) != len(group)")
 
         # Array to store predictions
@@ -141,7 +141,7 @@ def __call__(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
         cumulative_filter = np.zeros(num_samples).astype(bool)
 
         for group_value, group_clf in self.group_to_clf.items():
-            group_filter = (group == group_value)
+            group_filter = (sensitive_features == group_value)
             y_pred[group_filter] = group_clf(X[group_filter])
             cumulative_filter |= group_filter
 
@@ -196,7 +196,7 @@ def __init__(
         self.probabilities = probabilities
         self.rng = np.random.default_rng(seed)
     
-    def __call__(self, X: np.ndarray, group: np.ndarray = None) -> int:
+    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> int:
         # Assign each sample to a classifier
         clf_idx = self.rng.choice(
             np.arange(len(self.classifiers)),       # possible choices

From 9435885055b1380a22e4c6f4f5a4cfa293fafaf2 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Tue, 4 Jul 2023 14:31:51 +0100
Subject: [PATCH 07/13] corrected bug on TPR/FPR parity constraints

---
 fairlearn/postprocessing/_cvxpy_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index 7eead547d..a8e7ee706 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -14,6 +14,10 @@
 # Set of all fairness constraints with a cvxpy LP implementation
 ALL_CONSTRAINTS = {
     "equalized_odds",
+    # "true_positive_rate_parity",
+    # "false_positive_rate_parity",
+    # "true_negative_rate_parity",
+    # "false_negative_rate_parity",
 }
 
 NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = (
@@ -341,7 +345,7 @@ def compute_fair_optimum(
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
         constraints += [
-            cp.abs(groupwise_roc_points_vars[roc_idx_of_interest][i] - groupwise_roc_points_vars[roc_idx_of_interest][j]) <= tolerance
+            cp.abs(groupwise_roc_points_vars[i][roc_idx_of_interest] - groupwise_roc_points_vars[j][roc_idx_of_interest]) <= tolerance
             for i, j in product(range(n_groups), range(n_groups))
             if i < j
         ]

From b6ada3e9e568b554eaa58bf76a9d5a80656c24c6 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Tue, 4 Jul 2023 16:49:17 +0100
Subject: [PATCH 08/13] fixed linting errors

---
 .../_cvxpy_threshold_optimizer.py             | 245 +++++++++-----
 fairlearn/postprocessing/_cvxpy_utils.py      | 248 +++++++++------
 .../postprocessing/_randomized_classifiers.py | 300 +++++++++++-------
 fairlearn/postprocessing/_roc_utils.py        |  67 ++--
 .../postprocessing/_threshold_operation.py    |   2 +-
 .../postprocessing/_threshold_optimizer.py    |   6 +-
 fairlearn/reductions/_moments/error_rate.py   |   3 +-
 fairlearn/utils/_common.py                    |   8 +-
 8 files changed, 542 insertions(+), 337 deletions(-)

diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 9b34d29a2..f078af689 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -1,14 +1,14 @@
+# Copyright (c) Fairlearn contributors.
+# Licensed under the MIT License.
+
 """Threshold optimizer with relaxed fairness constraints.
 
 TODO
 ----
-- Add option for constraining only equality of FPR or TPR (currently it must be 
-both -> equalized odds);
 - Add option for constraining equality of positive predictions (independence
-criterion, aka demographic parity);
-- Add option to use l1 or linf distances for maximum tolerance between points.
-  - Currently 'equalized_odds' is defined using l-infinity distance (max between
-  TPR and FPR distances);
+  criterion, aka demographic parity);
+- Add option to use l1 or other distance functions for maximum tolerance between
+  points (currently l-inf is in use).
 
 """
 from __future__ import annotations
@@ -20,7 +20,9 @@
 from sklearn.metrics import roc_curve
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 
-from fairlearn.utils._input_validation import _validate_and_reformat_input
+# TODO: use this to validate input
+# from fairlearn.utils._input_validation import _validate_and_reformat_input
+
 from fairlearn.utils._common import _get_soft_predictions
 from fairlearn.utils._common import unpack_fp_fn_costs
 
@@ -33,7 +35,9 @@
     roc_convex_hull,
     calc_cost_of_point,
 )
-from ._randomized_classifiers import (  # TODO: try to use fairlearn's InterpolatedThreshold instead of our classifier API
+
+# TODO: try to use InterpolatedThreshold instead of our classifier API
+from ._randomized_classifiers import (
     RandomizedClassifier,
     EnsembleGroupwiseClassifiers,
 )
@@ -96,15 +100,15 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
             'auto'
                 use one of :code:`predict_proba`, :code:`decision_function`, or 
                 :code:`predict`, in that order.
-            
+
             'predict_proba'
                 use the second column from the output of :code:`predict_proba`. 
                 It is assumed that the second column represents the positive 
                 outcome.
-            
+
             'decision_function'
                 use the raw values given by the :code:`decision_function`.
-            
+
             'predict'
                 use the hard values reported by the :code:`predict` method if 
                 estimator is a classifier, and the regression values if 
@@ -136,17 +140,16 @@ class _RelaxedThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
     """
 
     def __init__(
-            self,
-            *,
-            predictor: BaseEstimator,
-            constraint: str = "equalized_odds",
-            tolerance: float = 0.0,
-            objective_costs: dict = None,
-            grid_size: int = 1000,
-            predict_method: str = "auto",
-            random_state: int = None,
-        ):
-
+        self,
+        *,
+        predictor: BaseEstimator,
+        constraint: str = "equalized_odds",
+        tolerance: float = 0.0,
+        objective_costs: dict = None,
+        grid_size: int = 1000,
+        predict_method: str = "auto",
+        random_state: int = None,
+    ):
         # Save arguments
         self.predictor = predictor
         self.constraint = constraint
@@ -162,13 +165,15 @@ def __init__(
 
         # Validate constraint tolerance
         if (
-            not isinstance(self.tolerance, (float, int)) 
-            or self.tolerance < 0 or self.tolerance > 1
+            not isinstance(self.tolerance, (float, int))
+            or self.tolerance < 0
+            or self.tolerance > 1
         ):
             raise ValueError(
-                f"Invalid `tolerance` provided: received "
+                "Invalid `tolerance` provided: received "
                 f"tolerance={self.tolerance}, but value should be in range "
-                f"[0, 1].")
+                "[0, 1]."
+            )
 
         # Check objective costs
         unpack_fp_fn_costs(self.objective_costs)
@@ -185,7 +190,7 @@ def __init__(
     def false_pos_cost(self) -> np.ndarray:
         fp_cost, _fn_cost = unpack_fp_fn_costs(self.objective_costs)
         return fp_cost
-    
+
     @property
     def false_neg_cost(self) -> np.ndarray:
         _fp_cost, fn_cost = unpack_fp_fn_costs(self.objective_costs)
@@ -200,12 +205,12 @@ def global_roc_point(self) -> np.ndarray:
         return self._global_roc_point
 
     def cost(
-            self,
-            *,
-            false_pos_cost: float = 1.0,
-            false_neg_cost: float = 1.0,
-        ) -> float:
-        """Computes the theoretical cost of the solution found.
+        self,
+        *,
+        false_pos_cost: float = 1.0,
+        false_neg_cost: float = 1.0,
+    ) -> float:
+        """Compute the theoretical cost of the solution found.
 
         Use false_pos_cost=false_neg_cost=1 for the 0-1 loss (the standard error
         rate), which amounts to maximizing accuracy.
@@ -238,25 +243,78 @@ def cost(
             false_pos_cost=false_pos_cost,
             false_neg_cost=false_neg_cost,
         )
-    
+
     def constraint_violation(self) -> float:
-        """This method should be part of a common interface between different
-        relaxed-constraint classes.
+        """Constraint violation of the LP solution found.
 
         Returns
         -------
         float
             The fairness constraint violation.
         """
+        self._check_fit_status()
+
+        if self.constraint not in ALL_CONSTRAINTS:
+            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+
         if self.constraint == "equalized_odds":
             return self.equalized_odds_violation()
+
+        elif self.constraint.endswith("rate_parity"):
+            constraint_to_error_type = {
+                "true_positive_rate_parity": "fn",
+                "false_positive_rate_parity": "fp",
+                "true_negative_rate_parity": "fp",
+                "false_negative_rate_parity": "fn",
+            }
+
+            return self.error_rate_parity_constraint_violation(
+                error_type=constraint_to_error_type[self.constraint],
+            )
+
         else:
-            raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+            raise NotImplementedError(
+                "Standalone constraint violation not yet computed for "
+                f"constraint='{self.constraint}'."
+            )
+
+    def error_rate_parity_constraint_violation(self, error_type: str) -> float:
+        """Compute the theoretical violation of an error-rate parity constraint.
+
+        Parameters
+        ----------
+        error_type : str
+            One of the following values:
+                "fp", for false positive errors (FPR or TNR parity);
+                "fn", for false negative errors (TPR or FNR parity).
+
+        Returns
+        -------
+        float
+            The maximum constraint violation among all groups.
+        """
+        self._check_fit_status()
+        valid_error_types = ("fp", "fn")
+        if error_type not in valid_error_types:
+            raise ValueError(
+                f"Invalid error_type='{error_type}', must be one of "
+                f"{valid_error_types}."
+            )
+
+        roc_idx_of_interest = 0 if error_type == "fp" else 1
+
+        return self._max_l_inf_between_points(
+            points=[
+                roc_point[roc_idx_of_interest]
+                for roc_point in self.groupwise_roc_points
+            ],
+        )
 
     def equalized_odds_violation(self) -> float:
-        """Computes the theoretical violation of the equalized odds constraint 
-        (i.e., the maximum l-inf distance between the ROC point of any pair
-        of groups).
+        """Compute the theoretical violation of the equalized odds constraint.
+
+        That is, the maximum l-inf distance between the ROC point of any pair
+        of groups.
 
         Returns
         -------
@@ -265,31 +323,52 @@ def equalized_odds_violation(self) -> float:
         """
         self._check_fit_status()
 
-        n_groups = len(self.groupwise_roc_points)
+        # Compute l-inf distance between each pair of groups
+        return self._max_l_inf_between_points(
+            points=self.groupwise_roc_points,
+        )
+
+    @staticmethod
+    def _max_l_inf_between_points(points: list[float | np.ndarray]) -> float:
+        # Number of points (should correspond to the number of groups)
+        n_points = len(points)
 
         # Compute l-inf distance between each pair of groups
         l_inf_constraint_violation = [
-            (np.linalg.norm(
-                self.groupwise_roc_points[i] - self.groupwise_roc_points[j],
-                ord=np.inf), (i, j))
-            for i, j in product(range(n_groups), range(n_groups))
+            (np.linalg.norm(points[i] - points[j], ord=np.inf), (i, j))
+            for i, j in product(range(n_points), range(n_points))
             if i < j
         ]
 
         # Return the maximum
         max_violation, (groupA, groupB) = max(l_inf_constraint_violation)
         logging.info(
-            f"Maximum fairness violation is between "
-            f"group={groupA} (p={self.groupwise_roc_points[groupA]}) and "
-            f"group={groupB} (p={self.groupwise_roc_points[groupB]});"
+            (
+                "Maximum fairness violation is between "
+                "group=%d (p=%s) and "
+                "group=%d (p=%s);"
+            ),
+            groupA,
+            points[groupA],
+            groupB,
+            points[groupB],
         )
 
         return max_violation
 
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        *,
+        sensitive_features: np.ndarray,  # TODO validate input and convert to proper format
+        y_scores: np.ndarray = None,
+    ):
+        """Find the optimal fair postprocessing.
 
-    def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y_scores: np.ndarray = None):
-        """Find the optimal postprocessing that fulfills the (possibly relaxed) 
-        fairness constraint on the provided data.
+        That is, it finds the postprocessing that minimizes loss, while
+        fulfilling the (possibly relaxed) fairness constraint on the provided
+        data.
 
         Parameters
         ----------
@@ -299,7 +378,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
             The input labels.
         sensitive_features : np.ndarray
             The sensitive features (group membership) of each sample.
-            Assumes groups are numbered [0, 1, ..., num_groups-1]. # TODO validate input and convert to proper format
+            Assumes groups are numbered [0, 1, ..., num_groups-1].
         y_scores : np.ndarray, optional
             The pre-computed model predictions on this data.
 
@@ -308,38 +387,42 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
         callable
             Returns self.
         """
-
         # Compute group stats
         self._global_prevalence = np.sum(y) / len(y)
 
         unique_groups = np.unique(sensitive_features)
         num_groups = len(unique_groups)
-        if np.max(unique_groups) > num_groups-1:
+        if np.max(unique_groups) > num_groups - 1:
             raise ValueError(
-                f"Groups should be numbered starting at 0, and up to "
+                "Groups should be numbered starting at 0, and up to "
                 f"num_groups-1. Got {num_groups} groups, but max value is "
                 f"{np.max(unique_groups)} != num_groups-1 == {num_groups-1}."
             )
 
         # Relative group sizes for LN and LP samples
-        group_sizes_label_neg = np.array([
-            np.sum(1 - y[sensitive_features == g]) for g in unique_groups
-        ])
-        group_sizes_label_pos = np.array([
-            np.sum(y[sensitive_features == g]) for g in unique_groups
-        ])
+        group_sizes_label_neg = np.array(
+            [np.sum(1 - y[sensitive_features == g]) for g in unique_groups]
+        )
+        group_sizes_label_pos = np.array(
+            [np.sum(y[sensitive_features == g]) for g in unique_groups]
+        )
 
         if np.sum(group_sizes_label_neg) + np.sum(group_sizes_label_pos) != len(y):
             raise RuntimeError(
-                f"Failed input validation. Are you using non-binary labels?")
+                "Failed input validation. Are you using non-binary labels?"
+            )
 
         # Convert to relative sizes
-        group_sizes_label_neg = group_sizes_label_neg.astype(float) / np.sum(group_sizes_label_neg)
-        group_sizes_label_pos = group_sizes_label_pos.astype(float) / np.sum(group_sizes_label_pos)
+        group_sizes_label_neg = group_sizes_label_neg.astype(float) / np.sum(
+            group_sizes_label_neg
+        )
+        group_sizes_label_pos = group_sizes_label_pos.astype(float) / np.sum(
+            group_sizes_label_pos
+        )
 
         # Compute group-wise ROC curves
         if y_scores is None:
-            y_scores = _get_soft_predictions(self.predictor, X, self.predict_method) 
+            y_scores = _get_soft_predictions(self.predictor, X, self.predict_method)
 
         self._all_roc_data = dict()
         for g in unique_groups:
@@ -353,11 +436,17 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
             # Check if max_roc_ticks is exceeded
             fpr, tpr, thrs = roc_curve_data
             if self.grid_size is not None and len(fpr) > self.grid_size:
-                indices_to_keep = np.arange(0, len(fpr), len(fpr) / self.grid_size).astype(int)
+                indices_to_keep = np.arange(
+                    0, len(fpr), len(fpr) / self.grid_size
+                ).astype(int)
 
                 # Bottom-left (0,0) and top-right (1,1) points must be kept
                 indices_to_keep[-1] = len(fpr) - 1
-                roc_curve_data = (fpr[indices_to_keep], tpr[indices_to_keep], thrs[indices_to_keep])
+                roc_curve_data = (
+                    fpr[indices_to_keep],
+                    tpr[indices_to_keep],
+                    thrs[indices_to_keep],
+                )
 
             self._all_roc_data[g] = roc_curve_data
 
@@ -367,7 +456,9 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
             group_fpr, group_tpr, _group_thresholds = self._all_roc_data[g]
 
             curr_roc_points = np.stack((group_fpr, group_tpr), axis=1)
-            curr_roc_points = np.vstack((curr_roc_points, [1, 0]))  # Add point (1, 0) to ROC curve
+            curr_roc_points = np.vstack(
+                (curr_roc_points, [1, 0])
+            )  # Add point (1, 0) to ROC curve
 
             self._all_roc_hulls[g] = roc_convex_hull(curr_roc_points)
 
@@ -385,7 +476,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
 
         # Construct each group-specific classifier
         all_rand_clfs = {
-            g: RandomizedClassifier.construct_at_target_ROC(    # TODO: check InterpolatedThresholder
+            g: RandomizedClassifier.construct_at_target_ROC(  # TODO: check InterpolatedThresholder
                 predictor=self.predictor,
                 roc_curve_data=self._all_roc_data[g],
                 target_roc_point=self._groupwise_roc_points[g],
@@ -395,16 +486,19 @@ def fit(self, X: np.ndarray, y: np.ndarray, *, sensitive_features: np.ndarray, y
         }
 
         # Construct the global classifier (can be used for all groups)
-        self._realized_classifier = EnsembleGroupwiseClassifiers(group_to_clf=all_rand_clfs)    # TODO: check InterpolatedThresholder
+        # TODO: check InterpolatedThresholder
+        self._realized_classifier = EnsembleGroupwiseClassifiers(
+            group_to_clf=all_rand_clfs
+        )
         return self
-    
+
     def _check_fit_status(self, raise_error: bool = True) -> bool:
-        """Checks whether this classifier has been fit on some data.
-        
+        """Check whether this classifier has been fit on some data.
+
         Parameters
         ----------
         raise_error : bool, optional
-            Whether to raise an error if the classifier is uninitialized 
+            Whether to raise an error if the classifier is uninitialized
             (otherwise will just return False), by default True.
 
         Returns
@@ -424,7 +518,8 @@ def _check_fit_status(self, raise_error: bool = True) -> bool:
 
             raise RuntimeError(
                 "This classifier has not yet been fitted to any data. "
-                "Call clf.fit(...) before this method.")
+                "Call clf.fit(...) before this method."
+            )
 
         return True
 
diff --git a/fairlearn/postprocessing/_cvxpy_utils.py b/fairlearn/postprocessing/_cvxpy_utils.py
index a8e7ee706..66827cf50 100644
--- a/fairlearn/postprocessing/_cvxpy_utils.py
+++ b/fairlearn/postprocessing/_cvxpy_utils.py
@@ -1,5 +1,7 @@
-"""A set of helper functions for defining cvxpy LP objective and constraints.
-"""
+# Copyright (c) Fairlearn contributors.
+# Licensed under the MIT License.
+
+"""A set of helper functions for defining cvxpy LP objective and constraints."""
 
 from __future__ import annotations
 import logging
@@ -14,10 +16,10 @@
 # Set of all fairness constraints with a cvxpy LP implementation
 ALL_CONSTRAINTS = {
     "equalized_odds",
-    # "true_positive_rate_parity",
-    # "false_positive_rate_parity",
-    # "true_negative_rate_parity",
-    # "false_negative_rate_parity",
+    "true_positive_rate_parity",
+    "false_positive_rate_parity",
+    "true_negative_rate_parity",
+    "false_negative_rate_parity",
 }
 
 NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = (
@@ -32,43 +34,40 @@
 
 
 def _import_cvxpy_if_available():
-    """Will try to import `cvxpy` and raise an appropriate error if it's not
-    installed.
-    """
+    """Will try to import `cvxpy` and raise an error if it's not installed."""
     try:
-        import cvxpy as cp
+        import cvxpy as cp  # noqa
     except ImportError:
         raise RuntimeError(_CVXPY_IMPORT_ERROR_MESSAGE)
 
 
 def compute_line(p1: np.ndarray, p2: np.ndarray) -> tuple[float, float]:
-    """Computes the slope and intercept of the line that passes
-    through the two given points.
-    
-    The intercept is the value at x=0!
-    (or NaN for vertical lines)
-    
-    For vertical lines just use the x-value of one of the points
-    to find the intercept at y=0.
+    """Compute the slope and intercept of a line given two points.
+
+    The intercept is the value at `x=0` (or NaN for vertical lines).
+
+    For vertical lines just use the x-value of one of the points to find the
+    value of `x` at y=0 (intersection with the x-axis).
 
     Parameters
     ----------
     p1 : np.ndarray
         A 2-D point.
+
     p2 : np.ndarray
         A 2-D point.
 
     Returns
     -------
     tuple[float, float]
-        A tuple pair with (slope, intercept) of the line that goes from p1 to p2.
+        A tuple pair with (slope, intercept) of the line that goes through p1
+        and p2.
 
     Raises
     ------
     ValueError
         Raised when input is invalid, e.g., when p1 == p2.
     """
-
     p1x, p1y = p1
     p2x, p2y = p2
     if all(p1 == p2):
@@ -88,36 +87,37 @@ def compute_line(p1: np.ndarray, p2: np.ndarray) -> tuple[float, float]:
 
 
 def compute_halfspace_inequality(
-        p1: np.ndarray,
-        p2: np.ndarray,
-    ) -> tuple[float, float, float]:
-    """Computes the halfspace inequality defined by the vector p1->p2, such that
-        Ax + b <= 0,
-        where A and b are extracted from the line that goes through p1->p2.
+    p1: np.ndarray,
+    p2: np.ndarray,
+) -> tuple[float, float, float]:
+    """Compute the half-space inequality defined by the vector p1->p2.
 
-    As such, the inequality enforces that points must lie on the LEFT of the 
-    line defined by the p1->p2 vector.
+    That is, computes the inequality that enforces that all points must lie on
+    the LEFT of the line defined by the p1->p2 vector.
 
-    In other words, input points are assumed to be in COUNTER CLOCK-WISE order 
-    (right-hand rule).
+    Will define the inequality in the form :math:`Ax + b <= 0`, and return a
+    tuple with :code:`(A_1, A_2, ..., b)` with shape :code:`n_dims + 1`.
+
+    Input points are assumed to be in COUNTER CLOCK-WISE order (right-hand
+    rule).
 
     Parameters
     ----------
     p1 : np.ndarray
-        A point in the halfspace.
+        A point in the half-space (or line for 2D).
     p2 : np.ndarray
-        Another point in the halfspace.
+        Another point in the half-space (or line for 2D).
 
     Returns
     -------
     tuple[float, float, float]
-        Returns an array of size=(n_dims + 1), with format [A; b],
-        representing the inequality Ax + b <= 0.
+        Returns a tuple of :code:`length=(n_dims + 1)`, with format
+        :code:`(*A; b)`, representing the inequality :math:`Ax + b <= 0`.
 
     Raises
     ------
     RuntimeError
-        Thrown in case if inconsistent internal state variables.
+        Thrown in case of inconsistent internal state variables.
     """
     slope, intercept = compute_line(p1, p2)
 
@@ -128,63 +128,65 @@ def compute_halfspace_inequality(
     # if slope is infinity, the constraint only applies to the values of x;
     # > the halfspace's b intercept value will correspond to this value of x;
     if np.isinf(slope):
-
         # Validating vertical line
         if not np.isclose(p1x, p2x):
             raise RuntimeError(
                 "Got infinite slope for line containing two points with "
-                "different x-axis coordinates.")
-        
+                "different x-axis coordinates."
+            )
+
         # Vector pointing downwards? then, x >= b
         if p2y < p1y:
             return [-1, 0, p1x]
-        
+
         # Vector pointing upwards? then, x <= b
         elif p2y > p1y:
             return [1, 0, -p1x]
-        
+
     # elif slope is zero, the constraint only applies to the values of y;
     # > the halfspace's b intercept value will correspond to this value of y;
     elif np.isclose(slope, 0.0):
-
         # Validating horizontal line
         if not np.isclose(p1y, p2y) or not np.isclose(p1y, intercept):
             raise RuntimeError(
-                f"Invalid horizontal line; points p1 and p2 should have same "
-                f"y-axis value as intercept ({p1y}, {p2y}, {intercept}).")
+                "Invalid horizontal line; points p1 and p2 should have same "
+                f"y-axis value as intercept ({p1y}, {p2y}, {intercept})."
+            )
 
         # Vector pointing leftwards? then, y <= b
         if p2x < p1x:
             return [0, 1, -p1y]
-        
+
         # Vector pointing rightwards? then, y >= b
         elif p2x > p1x:
             return [0, -1, p1y]
 
     # else, we have a standard diagonal line
     else:
-        
         # Vector points left?
         # then, y <= mx + b <=> -mx + y - b <= 0
         if p2x < p1x:
             return [-slope, 1, -intercept]
-        
+
         # Vector points right?
         # then, y >= mx + b <=> mx - y + b <= 0
         elif p2x > p1x:
             return [slope, -1, intercept]
-        
-    logging.error(f"No constraint can be concluded from points p1={p1} and p2={p2};")
+
+    logging.error("No constraint can be concluded from points p1=%s and p2=%s;", p1, p2)
     return [0, 0, 0]
 
 
 def make_cvxpy_halfspace_inequality(
-        p1: np.ndarray,
-        p2: np.ndarray,
-        cvxpy_point: "cvxpy.Variable",
-    ) -> "cvxpy.Expression":
-    """Creates a single cvxpy inequality constraint that enforces the given 
-    point, `cvxpy_point`, to lie on the left of the vector p1->p2.
+    p1: np.ndarray,
+    p2: np.ndarray,
+    cvxpy_point,
+):
+    """Create a `cvxpy` constraint to enforce a point to be inside a half-space.
+
+    That is, creates a single cvxpy inequality constraint that enforces the
+    given variable/point, `cvxpy_point`, to lie on the left of the vector p1->p2
+    (on the left of the half-space defined by the vector p1->p2).
 
     Points must be sorted in counter clock-wise order!
 
@@ -207,17 +209,21 @@ def make_cvxpy_halfspace_inequality(
 
 
 def make_cvxpy_point_in_polygon_constraints(
-        polygon_vertices: np.ndarray,
-        cvxpy_point: "cvxpy.Variable",
-    ) -> list["cvxpy.Expression"]:
-    """Creates the set of cvxpy constraints that force the given cvxpy variable
-    point to lie within the polygon defined by the given vertices.
+    polygon_vertices: np.ndarray,
+    cvxpy_point,
+) -> list:
+    """Create a set of `cvxpy` constraints for a point to be inside a polygon.
+
+    That is, creates the set of :code:`cvxpy.Expression` constraints that
+    enforce the given :code:`cvxpy_point: cvxpy.Variable` to lie within the
+    polygon defined by the given :code:`polygon_vertices` vertices.
 
     Parameters
     ----------
     polygon_vertices : np.ndarray
         A sequence of points that make up a polygon.
         Points must be sorted in COUNTER CLOCK-WISE order! (right-hand rule)
+
     cvxpy_point : cvxpy.Variable
         A cvxpy variable representing a point, over which the constraints will
         be applied.
@@ -229,7 +235,8 @@ def make_cvxpy_point_in_polygon_constraints(
     """
     return [
         make_cvxpy_halfspace_inequality(
-            polygon_vertices[i], polygon_vertices[(i+1) % len(polygon_vertices)],
+            polygon_vertices[i],
+            polygon_vertices[(i + 1) % len(polygon_vertices)],
             cvxpy_point,
         )
         for i in range(len(polygon_vertices))
@@ -237,24 +244,24 @@ def make_cvxpy_point_in_polygon_constraints(
 
 
 def compute_fair_optimum(
-        *,
-        fairness_constraint: str,
-        tolerance: float,
-        groupwise_roc_hulls: dict[int, np.ndarray],
-        group_sizes_label_pos: np.ndarray,
-        group_sizes_label_neg: np.ndarray,
-        global_prevalence: float,
-        false_positive_cost: float = 1.,
-        false_negative_cost: float = 1.,
-    ) -> tuple[np.ndarray, np.ndarray]:
-    """Computes the solution to finding the optimal fair (equal odds) classifier.
+    *,
+    fairness_constraint: str,
+    tolerance: float,
+    groupwise_roc_hulls: dict[int, np.ndarray],
+    group_sizes_label_pos: np.ndarray,
+    group_sizes_label_neg: np.ndarray,
+    global_prevalence: float,
+    false_positive_cost: float = 1.0,
+    false_negative_cost: float = 1.0,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Compute the solution to finding the optimal fair (equal odds) classifier.
 
     Can relax the equal odds constraint by some given tolerance.
 
     Parameters
     ----------
     fairness_constraint : str
-        The name of the fairness constraint under which the LP will be 
+        The name of the fairness constraint under which the LP will be
         optimized. Possible inputs are:
 
             'equalized_odds'
@@ -265,7 +272,7 @@ def compute_fair_optimum(
 
     groupwise_roc_hulls : dict[int, np.ndarray]
         A dict mapping each group to the convex hull of the group's ROC curve.
-        The convex hull is an np.array of shape (n_points, 2), containing the 
+        The convex hull is an np.array of shape (n_points, 2), containing the
         points that form the convex hull of the ROC curve, sorted in COUNTER
         CLOCK-WISE order.
 
@@ -300,8 +307,9 @@ def compute_fair_optimum(
     n_groups = len(groupwise_roc_hulls)
     if n_groups != len(group_sizes_label_neg) or n_groups != len(group_sizes_label_pos):
         raise ValueError(
-            f"Invalid arguments; all of the following should have the same "
-            f"length: groupwise_roc_hulls, group_sizes_label_neg, group_sizes_label_pos;")
+            "Invalid arguments; all of the following should have the same "
+            "length: groupwise_roc_hulls, group_sizes_label_neg, group_sizes_label_pos;"
+        )
 
     # Group-wise ROC points
     groupwise_roc_points_vars = [
@@ -313,18 +321,24 @@ def compute_fair_optimum(
     global_roc_point_var = cp.Variable(shape=2, name="Global ROC point", nonneg=True)
     constraints = [
         # Global FPR is the average of group FPRs weighted by LNs in each group
-        global_roc_point_var[0] == group_sizes_label_neg @ np.array([p[0] for p in groupwise_roc_points_vars]),
-
+        global_roc_point_var[0]
+        == (
+            group_sizes_label_neg @ np.array([p[0] for p in groupwise_roc_points_vars])
+        ),
         # Global TPR is the average of group TPRs weighted by LPs in each group
-        global_roc_point_var[1] == group_sizes_label_pos @ np.array([p[1] for p in groupwise_roc_points_vars]),
+        global_roc_point_var[1]
+        == (
+            group_sizes_label_pos @ np.array([p[1] for p in groupwise_roc_points_vars])
+        ),
     ]
 
-    ### APPLY FAIRNESS CONSTRAINTS
+    # START OF: applying fairness constraints
     # If "equalized_odds"
     # > i.e., constrain l-inf distance between any two groups' ROCs being less than `tolerance`
     if fairness_constraint == "equalized_odds":
         constraints += [
-            cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) <= tolerance
+            cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j])
+            <= tolerance
             for i, j in product(range(n_groups), range(n_groups))
             if i < j
         ]
@@ -332,20 +346,29 @@ def compute_fair_optimum(
     # If some rate parity, i.e., parity of one of {TPR, FPR, TNR, FNR}
     # i.e., constrain absolute distance between any two groups' rate metric
     elif fairness_constraint.endswith("rate_parity"):
-
         roc_idx_of_interest: int
-        if fairness_constraint == "true_positive_rate_parity" or fairness_constraint == "false_negative_rate_parity":
+        if (
+            fairness_constraint == "true_positive_rate_parity"
+            or fairness_constraint == "false_negative_rate_parity"
+        ):
             roc_idx_of_interest = 1
 
-        elif fairness_constraint == "false_positive_rate_parity" or fairness_constraint == "false_negative_rate_parity":
+        elif (
+            fairness_constraint == "false_positive_rate_parity"
+            or fairness_constraint == "false_negative_rate_parity"
+        ):
             roc_idx_of_interest = 0
-        
+
         else:
             # This point should never be reached as fairness constraint was previously validated
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
         constraints += [
-            cp.abs(groupwise_roc_points_vars[i][roc_idx_of_interest] - groupwise_roc_points_vars[j][roc_idx_of_interest]) <= tolerance
+            cp.abs(
+                groupwise_roc_points_vars[i][roc_idx_of_interest]
+                - groupwise_roc_points_vars[j][roc_idx_of_interest]
+            )
+            <= tolerance
             for i, j in product(range(n_groups), range(n_groups))
             if i < j
         ]
@@ -353,21 +376,25 @@ def compute_fair_optimum(
     # TODO: implement other constraints here
     else:
         raise NotImplementedError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+    # END OF: applying fairness constraints
 
     # Constraints for points in respective group-wise ROC curves
     for idx in range(n_groups):
         constraints += make_cvxpy_point_in_polygon_constraints(
             polygon_vertices=groupwise_roc_hulls[idx],
-            cvxpy_point=groupwise_roc_points_vars[idx])
+            cvxpy_point=groupwise_roc_points_vars[idx],
+        )
 
     # Define cost function
-    obj = cp.Minimize(calc_cost_of_point(
-        fpr=global_roc_point_var[0],
-        fnr=1 - global_roc_point_var[1],
-        prevalence=global_prevalence,
-        false_pos_cost=false_positive_cost,
-        false_neg_cost=false_negative_cost,
-    ))
+    obj = cp.Minimize(
+        calc_cost_of_point(
+            fpr=global_roc_point_var[0],
+            fnr=1 - global_roc_point_var[1],
+            prevalence=global_prevalence,
+            false_pos_cost=false_positive_cost,
+            false_neg_cost=false_negative_cost,
+        )
+    )
 
     # Define cvxpy problem
     prob = cp.Problem(obj, constraints)
@@ -378,13 +405,17 @@ def compute_fair_optimum(
     # (useful when comparing if two points are the same, within the cvxpy accuracy tolerance)
 
     # Log solution
-    logging.info(f"cvxpy solver took {prob.solver_stats.solve_time}s; status is {prob.status}.")
+    logging.info(
+        "cvxpy solver took %fs; status is %s.",
+        prob.solver_stats.solve_time,
+        prob.status,
+    )
 
     if prob.status not in ["infeasible", "unbounded"]:
         # Otherwise, problem.value is inf or -inf, respectively.
-        logging.info(f"Optimal solution value: {prob.value}")
+        logging.info("Optimal solution value: %s", prob.value)
         for variable in prob.variables():
-            logging.info(f"Variable {variable.name()}: value {variable.value}")
+            logging.info("Variable %s: value %s", variable.name(), variable.value)
     else:
         # This line should never be reached (there are always trivial fair
         # solutions in the ROC diagonal)
@@ -396,7 +427,7 @@ def compute_fair_optimum(
     # Validating solution cost
     solution_cost = calc_cost_of_point(
         fpr=global_roc_point[0],
-        fnr=1-global_roc_point[1],
+        fnr=1 - global_roc_point[1],
         prevalence=global_prevalence,
         false_pos_cost=false_positive_cost,
         false_neg_cost=false_negative_cost,
@@ -404,9 +435,15 @@ def compute_fair_optimum(
 
     if not np.isclose(solution_cost, prob.value):
         logging.error(
-            f"Solution was found but cost did not pass validation! "
-            f"Found solution ROC point {global_roc_point} with theoretical cost "
-            f"{prob.value}, but actual cost is {solution_cost};")
+            (
+                "Solution was found but cost did not pass validation! "
+                "Found solution ROC point %s with theoretical cost %s, "
+                "but actual cost is %s;"
+            ),
+            global_roc_point,
+            prob.value,
+            solution_cost,
+        )
 
     # Validating congruency between group-wise ROC points and global ROC point
     global_roc_from_groupwise = compute_global_roc_from_groupwise(
@@ -416,8 +453,13 @@ def compute_fair_optimum(
     )
     if not all(np.isclose(global_roc_from_groupwise, global_roc_point)):
         logging.error(
-            f"Solution: global ROC point ({global_roc_point}) does not seem to "
-            f"match group-wise ROC points; global should be "
-            f"({global_roc_from_groupwise}) to be consistent with group-wise;")
+            (
+                "Solution: global ROC point (%s) does not seem to "
+                "match group-wise ROC points; global should be "
+                "(%s) to be consistent with group-wise;"
+            ),
+            global_roc_point,
+            global_roc_from_groupwise,
+        )
 
     return groupwise_roc_points, global_roc_point
diff --git a/fairlearn/postprocessing/_randomized_classifiers.py b/fairlearn/postprocessing/_randomized_classifiers.py
index 76ead5885..1083a2212 100644
--- a/fairlearn/postprocessing/_randomized_classifiers.py
+++ b/fairlearn/postprocessing/_randomized_classifiers.py
@@ -1,3 +1,6 @@
+# Copyright (c) Fairlearn contributors.
+# Licensed under the MIT License.
+
 """Helper functions to construct and use randomized classifiers.
 
 TODO: this module will probably be substituted by the InterpolatedThresholder
@@ -14,39 +17,49 @@
 from scipy.spatial import ConvexHull
 
 
-class Classifier(ABC):
+class _Classifier(ABC):
+    """Public API for a classifier."""
+
     @abstractmethod
-    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
-        """Return predicted class, Y, for the given input features, X.
-        """
+    def __call__(
+        self, X: np.ndarray, *, sensitive_features: np.ndarray = None
+    ) -> np.ndarray:
+        """Return predicted class, Y, for the given input features, X."""
         raise NotImplementedError
 
 
-class BinaryClassifier(Classifier):
-    """Constructs a deterministic binary classifier, by thresholding a
-    real-valued score predictor.
-    """
+class BinaryClassifier(_Classifier):
+    """A deterministic binary classifier."""
 
     def __init__(
-            self,
-            score_predictor: callable,
-            threshold: float,
-        ):
-        """Constructs a deterministic binary classifier from the given
-        real-valued score predictor and a threshold in {0, 1}.
+        self,
+        score_predictor: Callable,
+        threshold: float,
+    ):
+        """Construct a binary classifier by thresholding score predictor.
+
+        Parameters
+        ----------
+        score_predictor : Callable
+            A real-valued score predictor.
+
+        threshold : float
+            A threshold value.
         """
         self.score_predictor = score_predictor
         self.threshold = threshold
 
-    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
-        """Computes predictions for the given samples, X.
+    def __call__(
+        self, X: np.ndarray, *, sensitive_features: np.ndarray = None
+    ) -> np.ndarray:
+        """Compute predictions for the given samples, X.
 
         Parameters
         ----------
         X : np.ndarray
             The input samples, in shape (num_samples, num_features).
         sensitive_features : None, optional
-            None. This argument will be ignored by this classifier as it does 
+            None. This argument will be ignored by this classifier as it does
             not consider sensitive attributes.
 
         Returns
@@ -57,19 +70,22 @@ def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> n
         return (self.score_predictor(X) >= self.threshold).astype(int)
 
 
-class BinaryClassifierAtROCDiagonal(Classifier):
-    """A dummy classifier whose predictions have no correlation with the input
-    features, but achieves whichever target FPR or TPR you want (on ROC diag.)
+class BinaryClassifierAtROCDiagonal(_Classifier):
+    """A dummy classifier with FPR=TPR.
+
+    That is, a classifier whose predictions have no correlation with the input
+    features, but achieves whichever target FPR or TPR you want (on ROC
+    diagonal).
     """
 
     def __init__(
-            self,
-            target_fpr: float = None,
-            target_tpr: float = None,
-            seed: int = 42,
-        ):
+        self,
+        target_fpr: float = None,
+        target_tpr: float = None,
+        seed: int = 42,
+    ):
         err_msg = (
-            f"Must provide exactly one of 'target_fpr' or 'target_tpr', "
+            "Must provide exactly one of 'target_fpr' or 'target_tpr', "
             f"got target_fpr={target_fpr}, target_tpr={target_tpr}."
         )
         if target_fpr is not None and target_tpr is not None:
@@ -84,43 +100,66 @@ def __init__(
         elif target_tpr is not None:
             self.target_tpr = target_tpr
             self.target_fpr = target_tpr
-        
+
         # Provided neither!
         else:
             raise ValueError(err_msg)
-        
+
         # Initiate random number generator
         self.rng = np.random.default_rng(seed)
 
-    def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> np.ndarray:
+    def __call__(
+        self,
+        X: np.ndarray,
+        *,
+        sensitive_features: np.ndarray = None,
+    ) -> np.ndarray:
+        """Compute (randomized) predictions.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            Input features (will be ignored as predictions are random).
+
+        sensitive_features : np.ndarray, optional
+            Sensitive features (will be ignored if passed).
+
+        Returns
+        -------
+        np.ndarray
+            The predicted classes for each input sample.
+        """
         return (self.rng.random(size=len(X)) >= (1 - self.target_fpr)).astype(int)
 
 
-class EnsembleGroupwiseClassifiers(Classifier):
-    """Constructs a classifier from a set of group-specific classifiers.
-    """
+class EnsembleGroupwiseClassifiers(_Classifier):
+    """Construct a classifier from a set of group-specific classifiers."""
 
     def __init__(self, group_to_clf: dict[int | str, Callable]):
-        """Constructs a classifier from a set of group-specific classifiers.
+        """Construct a classifier from a set of group-specific classifiers.
 
         Must be provided exactly one classifier per unique group value.
 
         Parameters
         ----------
         group_to_clf : dict[int | str, callable]
-            A mapping of group value to the classifier that should handle 
+            A mapping of group value to the classifier that should handle
             predictions for that specific group.
         """
         self.group_to_clf = group_to_clf
 
     def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarray:
-        """Compute predictions for the given input samples X, given their
-        sensitive attributes, `sensitive_features`.
+        """Compute predictions for the given input samples.
+
+        For the given samples :code:`X` and their sensitive group membership
+        :code:`sensitive_features`, compute their predicted classes using
+        group-specific classifiers.
 
         Parameters
         ----------
         X : np.ndarray
             Input samples, with shape (num_samples, num_features).
+
         group : np.ndarray, optional
             The sensitive attribute value for each input sample.
 
@@ -131,7 +170,7 @@ def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarr
             to a group-specific classifier for that sample.
         """
         if len(X) != len(sensitive_features):
-            raise ValueError(f"Invalid input sizes len(X) != len(group)")
+            raise ValueError("Invalid input sizes len(X) != len(group)")
 
         # Array to store predictions
         num_samples = len(X)
@@ -141,91 +180,89 @@ def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarr
         cumulative_filter = np.zeros(num_samples).astype(bool)
 
         for group_value, group_clf in self.group_to_clf.items():
-            group_filter = (sensitive_features == group_value)
+            group_filter = sensitive_features == group_value
             y_pred[group_filter] = group_clf(X[group_filter])
             cumulative_filter |= group_filter
 
         if np.sum(cumulative_filter) != num_samples:
             raise RuntimeError(
                 f"Computed group-wise predictions for {np.sum(cumulative_filter)} "
-                f"samples, but got {num_samples} input samples.")
+                f"samples, but got {num_samples} input samples."
+            )
 
         return y_pred
 
 
-class RandomizedClassifier(Classifier):
-    """Constructs a randomized classifier from the given  classifiers and 
-    their probabilities.
-    """
+class RandomizedClassifier(_Classifier):
+    """A randomized classifier that interpolates multiple classifiers."""
 
     def __init__(
-            self,
-            classifiers: list[Classifier],
-            probabilities: list[float],
-            seed: int = 42,
-        ):
-        """Constructs a randomized classifier from the given  classifiers and 
-        their probabilities.
-        
-        This classifier will compute predictions for the whole input dataset at 
-        once, which will in general be faster for larger inputs (when compared 
-        to predicting each sample separately).
+        self,
+        classifiers: list[Callable],
+        probabilities: list[float],
+        seed: int = 42,
+    ):
+        """Construct a randomized classifier that interpolates multiple classifiers.
 
         Parameters
         ----------
-        classifiers : list[callable]
+        classifiers : list[Callable]
             A list of classifiers
         probabilities : list[float]
-            A list of probabilities for each given classifier, where 
-            probabilities[idx] is the probability of using the prediction from 
+            A list of probabilities for each given classifier, where
+            probabilities[idx] is the probability of using the prediction from
             classifiers[idx].
         seed : int, optional
             A random seed, by default 42.
 
         Returns
         -------
-        callable
+        Callable
             The corresponding randomized classifier.
         """
         if len(classifiers) != len(probabilities):
             raise ValueError(
-                f"Invalid arguments: len(classifiers) != len(probabilities); "
-                f"({len(classifiers)} != {len(probabilities)});")
+                "Invalid arguments: len(classifiers) != len(probabilities); "
+                f"({len(classifiers)} != {len(probabilities)});"
+            )
 
         self.classifiers = classifiers
         self.probabilities = probabilities
         self.rng = np.random.default_rng(seed)
-    
+
     def __call__(self, X: np.ndarray, *, sensitive_features: np.ndarray = None) -> int:
+        """Compute interpolated predictions."""
         # Assign each sample to a classifier
         clf_idx = self.rng.choice(
-            np.arange(len(self.classifiers)),       # possible choices
-            size=len(X),                            # size of output array
-            p=self.probabilities,                   # prob. of each choice
+            np.arange(len(self.classifiers)),  # possible choices
+            size=len(X),  # size of output array
+            p=self.probabilities,  # prob. of each choice
         )
-        
+
         # Run predictions for all classifiers on all samples
         y_pred_choices = [clf(X) for clf in self.classifiers]
         # TODO:
         # we could actually just run the classifier for the samples that get
         # matched with it... similar to the EnsembleGroupwiseClassifiers call
         # method.
-        
-        return np.choose(clf_idx, y_pred_choices)
 
+        return np.choose(clf_idx, y_pred_choices)
 
     @staticmethod
     def find_weights_given_two_points(
-            point_A: np.ndarray,
-            point_B: np.ndarray,
-            target_point: np.ndarray,
-        ):
-        """Given two ROC points corresponding to existing binary classifiers,
-        find the weights that result in a classifier whose ROC point is target_point.
-        
+        point_A: np.ndarray,
+        point_B: np.ndarray,
+        target_point: np.ndarray,
+    ):
+        """Find the interpolation of points that achieves the target point.
+
+        That is, given two ROC points corresponding to existing binary
+        classifiers, :code:`point_A, point_B`, finds the weights that result in
+        a classifier whose ROC point is the target :code:`target_point`.
+
         May need to interpolate the two given points with a third point corresponding
         to a random classifier (random uniform distribution with different thresholds).
-        
+
         Returns
         -------
         tuple[np.ndarray, np.ndarray]
@@ -238,16 +275,17 @@ def find_weights_given_two_points(
 
         if all(np.isclose(point_B, target_point)):
             return np.array([1]), np.expand_dims(point_B, axis=0)
-        
+
         # If not, we'll have to triangulate the target using A and B
         point_A_fpr, point_A_tpr = point_A
         point_B_fpr, point_B_tpr = point_B
         target_fpr, target_tpr = target_point
         if not (point_A_fpr <= target_fpr <= point_B_fpr):
             raise ValueError(
-                f"Invalid input. FPR should fulfill: "
+                "Invalid input. FPR should fulfill: "
                 f"({point_A_fpr} point_A_FPR) <= ({target_fpr} target_fpr) <= "
-                f"({point_B_fpr} point_B_fpr)")
+                f"({point_B_fpr} point_B_fpr)"
+            )
 
         # Calculate weights for points A and B
         weight_A = (target_fpr - point_B_fpr) / (point_A_fpr - point_B_fpr)
@@ -258,8 +296,9 @@ def find_weights_given_two_points(
         if not np.isclose(point_P_upwards[0], target_fpr):
             raise RuntimeError(
                 "Failed projecting target_fpr to ROC hull frontier. "
-                f"Got proj. FPR={point_P_upwards[0]}; target FPR={target_fpr};")
-        
+                f"Got proj. FPR={point_P_upwards[0]}; target FPR={target_fpr};"
+            )
+
         # Check if the target point lies in the AB line (and return if so)
         if all(np.isclose(point_P_upwards, target_point)):
             return weights_AB, np.vstack((point_A, point_B))
@@ -268,7 +307,9 @@ def find_weights_given_two_points(
         point_P_downwards = np.array([target_fpr, target_fpr])
 
         # Calculate weights for P upwards and P downwards
-        weight_P_upwards = (target_tpr - point_P_downwards[1]) / (point_P_upwards[1] - point_P_downwards[1])
+        weight_P_upwards = (target_tpr - point_P_downwards[1]) / (
+            point_P_upwards[1] - point_P_downwards[1]
+        )
 
         # Validating triangulation results
         all_points = np.vstack((point_A, point_B, point_P_downwards))
@@ -277,39 +318,45 @@ def find_weights_given_two_points(
         if not np.isclose(all_weights.sum(), 1):
             raise RuntimeError(
                 f"Sum of linear interpolation weights was {all_weights.sum()}, "
-                f"should be 1!")
+                "should be 1!"
+            )
 
         if not all(np.isclose(target_point, all_weights @ all_points)):
             raise RuntimeError(
-                f"Triangulation of target point failed. "
-                f"Target was {target_point}; got {all_weights @ all_points}.")
+                "Triangulation of target point failed. "
+                f"Target was {target_point}; got {all_weights @ all_points}."
+            )
 
         return all_weights, all_points
 
     @staticmethod
     def construct_at_target_ROC(
-            predictor: callable,
-            roc_curve_data: tuple,
-            target_roc_point: np.ndarray,
-            seed: int = 42,
-        ) -> "RandomizedClassifier":
-        """Constructs a randomized classifier in the interior of the
-        convex hull of the classifier's ROC curve, at a given target
-        ROC point.
-        
+        predictor: Callable,
+        roc_curve_data: tuple,
+        target_roc_point: np.ndarray,
+        seed: int = 42,
+    ) -> "RandomizedClassifier":
+        """Construct a classifier at the target ROC point.
+
+        That is, constructs a (possibly randomized) classifier in the interior
+        of the convex hull of the predictor's ROC curve, at a given target ROC
+        point.
+
         Parameters
         ----------
-        predictor : callable
+        predictor : Callable
             A predictor that outputs real-valued scores in range [0; 1].
+
         roc_curve_data : tuple[np.array...]
             The ROC curve of the given classifier, as a tuple of
             (FPR values; TPR values; threshold values).
+
         target_roc_point : np.ndarray
             The target ROC point in (FPR, TPR).
-        
+
         Returns
         -------
-        rand_clf : callable
+        rand_clf : Callable
             A (randomized) binary classifier whose expected FPR and TPR
             corresponds to the given target ROC point.
         """
@@ -321,15 +368,21 @@ def construct_at_target_ROC(
         # (3 minimum to compute convex hull)
         if len(fpr) <= 1:
             raise ValueError(
-                f"Invalid ROC curve data (only has one point): "
-                f"fpr:{fpr}; tpr:{tpr}.")
+                f"Invalid ROC curve data (only has one point): fpr:{fpr}; tpr:{tpr}."
+            )
 
         if len(fpr) == 2:
-            logging.warning(f"Got ROC data with only 2 points: producing a random classifier...")
+            logging.warning(
+                "Got ROC data with only 2 points: producing a random classifier..."
+            )
             if not np.isclose(target_roc_point[0], target_roc_point[1]):
                 logging.error(
-                    f"Invalid target ROC point ({target_roc_point}) is not in "
-                    "diagonal ROC line, but a random-classifier ROC was provided.")
+                    (
+                        "Invalid target ROC point (%s) is not in diagonal ROC line, "
+                        "but a random-classifier ROC was provided."
+                    ),
+                    target_roc_point,
+                )
 
             return BinaryClassifierAtROCDiagonal(target_fpr=target_roc_point[0])
 
@@ -339,7 +392,9 @@ def construct_at_target_ROC(
 
         # Filter out ROC points in the interior of the convex hull and other suboptimal points
         points_above_diagonal = np.argwhere(tpr >= fpr).ravel()
-        useful_points_idx = np.array(sorted(set(hull.vertices) & set(points_above_diagonal)))
+        useful_points_idx = np.array(
+            sorted(set(hull.vertices) & set(points_above_diagonal))
+        )
 
         fpr = fpr[useful_points_idx]
         tpr = tpr[useful_points_idx]
@@ -363,7 +418,7 @@ def construct_at_target_ROC(
         )
 
         if max(weights) > 1:
-            logging.error(f"Got triangulation weights over 100%: w={weights};")
+            logging.error("Got triangulation weights over 100%: w=%s;", weights)
 
         # Instantiate classifiers for points A and B
         clf_a = BinaryClassifier(predictor, threshold=thrs[point_A_idx])
@@ -376,7 +431,7 @@ def construct_at_target_ROC(
 
             elif all(np.isclose(target_roc_point, point_B_roc)):
                 return clf_b
-            
+
             else:
                 # differences from target point to A or B are significant enough
                 # to warrant triangulating between multiple points
@@ -386,7 +441,7 @@ def construct_at_target_ROC(
         # (hence, should've been caught by the previous if statement)
         if len(weights) == 1:
             raise RuntimeError("Invalid triangulation.")
-        
+
         # If there are two points, return a randomized classifier between the two
         elif len(weights) == 2:
             return RandomizedClassifier(
@@ -395,13 +450,15 @@ def construct_at_target_ROC(
                 seed=seed,
             )
 
-        # If it's in the interior of the ROC curve, requires instantiating a randomized classifier at the diagonal
+        # If it's in the interior of the ROC curve, requires instantiating a
+        # randomized classifier at the diagonal
         elif len(weights) == 3:
             fpr_rand, tpr_rand = points[2]
             if not np.isclose(fpr_rand, tpr_rand):
                 raise RuntimeError(
-                    f"Triangulation point at ROC diagonal has FPR != TPR "
-                    f"({fpr_rand} != {tpr_rand}); ")
+                    "Triangulation point at ROC diagonal has FPR != TPR "
+                    f"({fpr_rand} != {tpr_rand}); "
+                )
 
             # >>> BUG this would be better but for some reason it doesn't work!
             # rng = np.random.default_rng(42)
@@ -409,23 +466,26 @@ def construct_at_target_ROC(
             # # or...
             # clf_rand = BinaryClassifierAtROCDiagonal(target_fpr=fpr_rand)
             # <<<
-            clf_rand = lambda X: (np.random.random(size=len(X)) >= (1 - fpr_rand)).astype(int)
+            def clf_rand(X):
+                return (np.random.random(size=len(X)) >= (1 - fpr_rand)).astype(int)
 
             return RandomizedClassifier(
-                classifiers=[clf_a, clf_b, clf_rand],
-                probabilities=weights,
-                seed=seed)
-        
+                classifiers=[clf_a, clf_b, clf_rand], probabilities=weights, seed=seed
+            )
+
         else:
             raise RuntimeError(
-                f"Invalid triangulation of classifiers; "
-                f"weights: {weights}; points: {points};")
+                "Invalid triangulation of classifiers; "
+                f"weights: {weights}; points: {points};"
+            )
 
     @staticmethod
     def find_points_for_target_ROC(roc_curve_data, target_roc_point):
-        """Retrieves a set of realizable points (and respective weights) in the
-        provided ROC curve that can be used to realize any target ROC in the
-        interior of the ROC curve.
+        """Retrieve the realizable points that interpolate the target ROC point.
+
+        That is, retrieves a set of realizable points (and respective weights)
+        in the provided ROC curve that can be used to realize any target ROC in
+        the interior of the ROC curve.
 
         NOTE: this method is a bit redundant -- has functionality in common with
         RandomizedClassifier.construct_at_target_ROC()
@@ -440,7 +500,9 @@ def find_points_for_target_ROC(roc_curve_data, target_roc_point):
 
         # Filter out ROC points in the interior of the convex hull and other suboptimal points
         points_above_diagonal = np.argwhere(tpr >= fpr).ravel()
-        useful_points_idx = np.array(sorted(set(hull.vertices) & set(points_above_diagonal)))
+        useful_points_idx = np.array(
+            sorted(set(hull.vertices) & set(points_above_diagonal))
+        )
 
         fpr = fpr[useful_points_idx]
         tpr = tpr[useful_points_idx]
diff --git a/fairlearn/postprocessing/_roc_utils.py b/fairlearn/postprocessing/_roc_utils.py
index 9b7506868..fcb394bce 100644
--- a/fairlearn/postprocessing/_roc_utils.py
+++ b/fairlearn/postprocessing/_roc_utils.py
@@ -1,11 +1,14 @@
+# Copyright (c) Fairlearn contributors.
+# Licensed under the MIT License.
+
 """Helper functions for threshold optimization methods.
 
 NOTE
 ----
 - Most utils defined here likely have a similar counter-part already implemented
-somewhere in the fairlearn code-base.
+  somewhere in the `fairlearn` code-base.
 - With time they will probably be substituted by that counter-part, and these
-implementations removed.
+  implementations removed.
 """
 import logging
 import numpy as np
@@ -14,14 +17,14 @@
 
 
 def calc_cost_of_point(
-        fpr: float,
-        fnr: float,
-        prevalence: float,
-        *,
-        false_pos_cost: float = 1.,
-        false_neg_cost: float = 1.,
-    ) -> float:
-    """Calculates the cost of the given ROC point.
+    fpr: float,
+    fnr: float,
+    prevalence: float,
+    *,
+    false_pos_cost: float = 1.0,
+    false_neg_cost: float = 1.0,
+) -> float:
+    """Calculate the cost of the given ROC point.
 
     Parameters
     ----------
@@ -48,7 +51,7 @@ def calc_cost_of_point(
 
 
 def compute_roc_point_from_predictions(y_true, y_pred_binary):
-    """Computes the ROC point associated with the provided binary predictions.
+    """Compute the ROC point associated with the provided binary predictions.
 
     Parameters
     ----------
@@ -66,21 +69,20 @@ def compute_roc_point_from_predictions(y_true, y_pred_binary):
 
     # FPR = FP / LN
     fpr = fp / (fp + tn)
-    
+
     # TPR = TP / LP
     tpr = tp / (tp + fn)
-    
+
     return (fpr, tpr)
 
 
 def compute_global_roc_from_groupwise(
-        groupwise_roc_points: np.ndarray,
-        groupwise_label_pos_weight: np.ndarray,
-        groupwise_label_neg_weight: np.ndarray,
-    ) -> np.ndarray:
-    """Computes the global ROC point that corresponds to the provided group-wise
-    ROC points.
-    
+    groupwise_roc_points: np.ndarray,
+    groupwise_label_pos_weight: np.ndarray,
+    groupwise_label_neg_weight: np.ndarray,
+) -> np.ndarray:
+    """Compute the global ROC point that corresponds to the provided group-wise ROC points.
+
     The global ROC is a linear combination of the group-wise points, with
     different weights for computing FPR and TPR -- the first related to LNs, and
     the second to LPs.
@@ -106,11 +108,14 @@ def compute_global_roc_from_groupwise(
     n_groups, _ = groupwise_roc_points.shape
 
     # Validating input shapes
-    if (len(groupwise_label_pos_weight) != len(groupwise_label_neg_weight) or
-        len(groupwise_label_pos_weight) != n_groups):
-       raise ValueError(
-           "Invalid input shapes: length of all arguments must be equal (the "
-           "number of different sensitive groups).")
+    if (
+        len(groupwise_label_pos_weight) != len(groupwise_label_neg_weight)
+        or len(groupwise_label_pos_weight) != n_groups
+    ):
+        raise ValueError(
+            "Invalid input shapes: length of all arguments must be equal (the "
+            "number of different sensitive groups)."
+        )
 
     # Normalize group LP (/LN) weights by their size
     if not np.isclose(groupwise_label_pos_weight.sum(), 1.0):
@@ -129,21 +134,20 @@ def compute_global_roc_from_groupwise(
 
 
 def roc_convex_hull(roc_points: np.ndarray) -> np.ndarray:
-    """Computes the convex hull of the provided ROC points.
-    
+    """Compute the convex hull of the provided ROC points.
+
     Parameters
     ----------
     roc_points : np.ndarray
         An array of shape (n_points, n_dims) containing all points
         of a provided ROC curve.
-    
+
     Returns
     -------
     hull_points : np.ndarray
         An array of shape (n_hull_points, n_dim) containing all
         points in the convex hull of the ROC curve.
     """
-
     # Save init data just for logging
     init_num_points, _dims = roc_points.shape
 
@@ -159,7 +163,8 @@ def roc_convex_hull(roc_points: np.ndarray) -> np.ndarray:
     hull_indices = hull.vertices
 
     logging.info(
-        f"ROC convex hull contains {len(hull_indices) / init_num_points:.1%} "
-        f"of the original points.")
+        "ROC convex hull contains %.1f%% of the original points.",
+        (len(hull_indices) / init_num_points) * 100,
+    )
 
     return roc_points[hull_indices]
diff --git a/fairlearn/postprocessing/_threshold_operation.py b/fairlearn/postprocessing/_threshold_operation.py
index caeca2659..4db5ee93f 100644
--- a/fairlearn/postprocessing/_threshold_operation.py
+++ b/fairlearn/postprocessing/_threshold_operation.py
@@ -18,7 +18,7 @@ class ThresholdOperation:
     """
 
     def __init__(self, operator, threshold):
-        if operator not in [">", "<"]:          # NOTE for PR: sklearn uses >= for ROC threshold; see: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html
+        if operator not in [">", "<"]:
             raise ValueError("Unrecognized operator: " + operator)
         self._operator = operator
         self._threshold = threshold
diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index 08ee0b1a2..cac9006f3 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -183,15 +183,15 @@ class ThresholdOptimizer(BaseEstimator, MetaEstimatorMixin):
             'auto'
                 use one of :code:`predict_proba`, :code:`decision_function`, or 
                 :code:`predict`, in that order.
-            
+
             'predict_proba'
                 use the second column from the output of :code:`predict_proba`. 
                 It is assumed that the second column represents the positive 
                 outcome.
-            
+
             'decision_function'
                 use the raw values given by the :code:`decision_function`.
-            
+
             'predict'
                 use the hard values reported by the :code:`predict` method if 
                 estimator is a classifier, and the regression values if 
diff --git a/fairlearn/reductions/_moments/error_rate.py b/fairlearn/reductions/_moments/error_rate.py
index edbd5f204..27ad27a89 100644
--- a/fairlearn/reductions/_moments/error_rate.py
+++ b/fairlearn/reductions/_moments/error_rate.py
@@ -9,6 +9,7 @@
 
 from .moment import _ALL, _LABEL, ClassificationMoment
 
+
 class ErrorRate(ClassificationMoment):
     r"""Misclassification error as a moment.
 
@@ -44,7 +45,7 @@ def __init__(self, *, costs=None):
             self.fp_cost = 1.0
             self.fn_cost = 1.0
         else:
-            self.fp_cpst, self.fn_cost = unpack_fp_fn_costs(costs)
+            self.fp_cost, self.fn_cost = unpack_fp_fn_costs(costs)
 
     def load_data(self, X, y, *, sensitive_features, control_features=None):
         """Load the specified data into the object."""
diff --git a/fairlearn/utils/_common.py b/fairlearn/utils/_common.py
index 2af6e1b68..de978c153 100644
--- a/fairlearn/utils/_common.py
+++ b/fairlearn/utils/_common.py
@@ -10,19 +10,19 @@
 
 
 def unpack_fp_fn_costs(costs: dict) -> tuple[float, float]:
-    """Validates and unpacks the given `costs`.
+    """Validate and unpacks the given `costs`.
 
     Parameters
     ----------
     costs : dict
         A dictionary detailing the cost for false positives and false negatives,
         of the form :code:`{'fp': <fp_cost>, 'fn': <fn_cost>}`.
-        
+
     Returns
     -------
     tuple[float, float]
         A tuple respectively composed of the cost of false positives and the
-        cost of false negatives, i.e., a tuple with 
+        cost of false negatives, i.e., a tuple with
         :code:`(fp_cost, fn_cost)`.
 
     Raises
@@ -43,7 +43,7 @@ def unpack_fp_fn_costs(costs: dict) -> tuple[float, float]:
 
     else:
         raise ValueError(_MESSAGE_BAD_COSTS)
-    
+
     return fp_cost, fn_cost
 
 

From 942252c4779c2ab9e2c6ba877ab6559c4b517904 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Thu, 6 Jul 2023 15:17:06 +0100
Subject: [PATCH 09/13] added draft example for relaxed thresholding

---
 examples/plot_relaxed_equalized_odds.py       | 163 ++++++++++++++++++
 .../_cvxpy_threshold_optimizer.py             |  15 +-
 2 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 examples/plot_relaxed_equalized_odds.py

diff --git a/examples/plot_relaxed_equalized_odds.py b/examples/plot_relaxed_equalized_odds.py
new file mode 100644
index 000000000..f02a12a81
--- /dev/null
+++ b/examples/plot_relaxed_equalized_odds.py
@@ -0,0 +1,163 @@
+# Copyright (c) Fairlearn contributors.
+# Licensed under the MIT License.
+
+"""
+==========================================
+RelaxedThresholdOptimizer with Census Data
+==========================================
+"""
+
+# %%
+# Load and preprocess the data set
+# --------------------------------
+# We download the data set using `fetch_adult` function in
+# `fairlearn.datasets`. We start by importing the various modules we're going
+# to use:
+#
+
+import numpy as np
+import pandas as pd
+from sklearn import metrics as skm
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+
+from fairlearn.datasets import fetch_adult
+from fairlearn.metrics import (
+    MetricFrame,
+    equalized_odds_difference,
+    true_positive_rate,
+    false_positive_rate,
+    count,
+    # plot_model_comparison,
+)
+
+# %%
+# We can now load and inspect the data by using the `fairlearn.datasets` module:
+
+data = fetch_adult()
+X_raw = data.data
+Y = (data.target == ">50K") * 1
+X_raw
+
+# %%
+# We are going to treat the sex of each individual as a sensitive feature
+# (where 0 indicates female and 1 indicates male), and in this particular case
+# we are going separate this feature out and drop it from the main data. We
+# then perform some standard data preprocessing steps to convert the data into
+# a format suitable for the ML algorithms
+
+A = X_raw["sex"]
+X = X_raw.drop(labels=["sex"], axis=1)
+X = pd.get_dummies(X)
+
+sc = StandardScaler()
+X_scaled = sc.fit_transform(X)
+X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
+
+le = LabelEncoder()
+Y = le.fit_transform(Y)
+
+# %%
+# Finally, we split the data into training and test sets:
+
+X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(
+    X_scaled, Y, A, test_size=0.4, random_state=0, stratify=Y
+)
+
+# Work around indexing bug
+X_train = X_train.reset_index(drop=True)
+A_train = A_train.reset_index(drop=True)
+X_test = X_test.reset_index(drop=True)
+A_test = A_test.reset_index(drop=True)
+
+# %%
+# Training a fairness-unaware predictor
+# -------------------------------------
+# To show the effect of Fairlearn we will first train a standard ML predictor
+# that does not incorporate fairness. For speed of demonstration, we use the
+# simple :class:`sklearn.linear_model.LogisticRegression` class:
+
+unmitigated_predictor = GradientBoostingClassifier(n_estimators=1000)
+
+# %%time
+unmitigated_predictor.fit(X_train, Y_train)
+
+# %%
+# Compute predictions
+y_test_pred_scores = unmitigated_predictor.predict_proba(X_test)[:, -1]
+y_test_pred_binary = y_test_pred_scores >= 0.5      # threshold = 0.5
+
+# %%
+# We can start to assess the predictor's fairness using the `MetricFrame`:
+metric_frame = MetricFrame(
+    metrics={
+        "accuracy": skm.accuracy_score,
+        "true_positive_rate": true_positive_rate,
+        "false_positive_rate": false_positive_rate,
+        "count": count,
+    },
+    sensitive_features=A_test,
+    y_true=Y_test,
+    y_pred=y_test_pred_binary,
+)
+print(metric_frame.overall)
+print(metric_frame.by_group)
+metric_frame.by_group.plot.bar(
+    subplots=True,
+    layout=[4, 1],
+    legend=False,
+    figsize=[12, 8],
+    title="Accuracy and error-rates rate by group",
+)
+
+
+# %%
+unmitigated_equalized_odds_diff = equalized_odds_difference(
+    y_true=Y_test, y_pred=y_test_pred_binary, sensitive_features=A_test,
+)
+
+print(f"Equalized odds difference for unmitigated classifier: {unmitigated_equalized_odds_diff:.3}")
+
+# %%
+from fairlearn.postprocessing._cvxpy_threshold_optimizer import _RelaxedThresholdOptimizer
+
+fair_clf = _RelaxedThresholdOptimizer(
+    # predictor=unmitigated_predictor,    # TODO: use this when we no longer rely on callables
+    # predict_method="predict_proba",
+    predictor=lambda *args, **kwargs: unmitigated_predictor.predict(*args, **kwargs),
+    predict_method="__call__",
+    constraint="equalized_odds",
+    tolerance=0,
+)
+
+
+# %%
+# NOTE: in the future the relaxed thresholder will be compatible with string sensitive features
+# TODO: this can be omitted when that happens!
+def parse_sensitive_features(series) -> np.ndarray:
+    return np.array([
+        1 if str(elem) == "Female" else 0 for elem in series
+    ])
+
+
+A_train_np = parse_sensitive_features(A_train)
+A_test_np = parse_sensitive_features(A_test)
+
+
+# %%
+fair_clf.fit(X_train, Y_train, sensitive_features=A_train_np)
+
+
+# %%
+y_test_pred_postprocessed = fair_clf.predict(X_test, sensitive_features=A_test_np)
+
+# %%
+postprocessed_equalized_odds_diff = equalized_odds_difference(
+    y_true=Y_test, y_pred=y_test_pred_postprocessed, sensitive_features=A_test,
+)
+
+print(f"Equalized odds difference after postprocessing: {postprocessed_equalized_odds_diff:.3}")
+
+# %%
+# TODO: plot both models, plot all postprocessings with different tolerances, etc.
diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index f078af689..81694dd41 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -10,6 +10,19 @@
 - Add option to use l1 or other distance functions for maximum tolerance between
   points (currently l-inf is in use).
 
+TODO for PR #1248
+-----------------
+- [ ] Adapt all data inputs to common fairlearn accepted types (e.g., numpy
+      arrays, pandas DFs, lists, ...)
+- [ ] Try to substitute the classifier helpers under `_randomized_classifiers`
+      with the InterpolatedThresholder.
+      - Triangulating target ROC points should still be necessary, but the
+      returned classifier can now be of type `InterpolatedThresholder` instead
+      of our own classifier types.
+- [ ] Currently the use of our `_randomized_classifiers` API is only compatible
+      with `predict_method="__call__"`, this should be fixed either in our API
+      our by substituting out classes with the `InterpolatedThresholder`.
+
 """
 from __future__ import annotations
 
@@ -154,7 +167,7 @@ def __init__(
         self.predictor = predictor
         self.constraint = constraint
         self.tolerance = tolerance
-        self.objective_costs = objective_costs
+        self.objective_costs = objective_costs or {"fp": 1, "fn": 1}
         self.grid_size = grid_size
         self.predict_method = predict_method
         self.random_state = random_state

From 1a071c26f5b42a2547a32f7d517b7c1563f62027 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Fri, 7 Jul 2023 10:39:54 +0100
Subject: [PATCH 10/13] updated example

---
 examples/plot_relaxed_equalized_odds.py | 41 +++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/examples/plot_relaxed_equalized_odds.py b/examples/plot_relaxed_equalized_odds.py
index f02a12a81..965504a74 100644
--- a/examples/plot_relaxed_equalized_odds.py
+++ b/examples/plot_relaxed_equalized_odds.py
@@ -29,7 +29,7 @@
     true_positive_rate,
     false_positive_rate,
     count,
-    # plot_model_comparison,
+    plot_model_comparison,
 )
 
 # %%
@@ -160,4 +160,41 @@ def parse_sensitive_features(series) -> np.ndarray:
 print(f"Equalized odds difference after postprocessing: {postprocessed_equalized_odds_diff:.3}")
 
 # %%
-# TODO: plot both models, plot all postprocessings with different tolerances, etc.
+# Add the unconstrained/unmitigated classifier predictions
+all_model_predictions = {"unconstrained": y_test_pred_binary}
+
+
+# Helper to get different thresholdings for different tolerance values
+def compute_test_predictions_with_relaxed_constraints(tolerance: float) -> np.ndarray:
+    # Instantiate
+    clf = _RelaxedThresholdOptimizer(
+        predictor=lambda *args, **kwargs: unmitigated_predictor.predict(*args, **kwargs),
+        predict_method="__call__",
+        constraint="equalized_odds",
+        tolerance=tolerance,
+        random_state=23,
+    )
+
+    # Fit
+    clf.fit(X_train, Y_train, sensitive_features=A_train_np)
+
+    return clf.predict(X_test, sensitive_features=A_test_np)
+
+
+# Compute predictions at different levels of tolerance
+all_model_predictions.update({
+    f"train tolerance={tol:.1}": compute_test_predictions_with_relaxed_constraints(tol)
+    for tol in np.arange(0, unmitigated_equalized_odds_diff, 1e-2)
+})
+
+# %%
+# Plot all models in the fairness-accuracy landscape
+plot_model_comparison(
+    x_axis_metric=skm.accuracy_score,
+    y_axis_metric=equalized_odds_difference,
+    y_true=Y_test,
+    y_preds=all_model_predictions,
+    sensitive_features=A_test,
+    point_labels=True,
+    show_plot=True,
+)

From 50e8fde8b405263758eadb98348437b3987b55f3 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Fri, 7 Jul 2023 12:18:46 +0100
Subject: [PATCH 11/13] added plotting safe-guard

---
 fairlearn/postprocessing/_plotting.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fairlearn/postprocessing/_plotting.py b/fairlearn/postprocessing/_plotting.py
index a5a608d42..5c9fed30b 100644
--- a/fairlearn/postprocessing/_plotting.py
+++ b/fairlearn/postprocessing/_plotting.py
@@ -7,6 +7,7 @@
 
 from ._constants import _MATPLOTLIB_IMPORT_ERROR_MESSAGE
 from ._threshold_optimizer import ThresholdOptimizer
+from ._cvxpy_threshold_optimizer import _RelaxedThresholdOptimizer
 
 _debug_colors = None
 _debug_ncolors = 10
@@ -63,7 +64,17 @@ def _plot_curve(ax, sensitive_feature, x_col, y_col, points):
 
 
 def _raise_if_not_threshold_optimizer(obj):
-    if not isinstance(obj, ThresholdOptimizer):
+    if isinstance(obj, ThresholdOptimizer):
+        return  # OK
+
+    elif isinstance(obj, _RelaxedThresholdOptimizer):
+        # TODO: implement plotting functionality for postprocessing w/ relaxed constraints
+        raise NotImplementedError(
+            f"Plotting functionality is not yet implemented for objects of "
+            f"type {_RelaxedThresholdOptimizer.__name__}."
+        )
+
+    else:
         raise ValueError(
             "Argument {} needs to be of type {}.".format(
                 obj.__name__, ThresholdOptimizer.__name__

From 7510e560e67bbedf25d2899f3a01ed63913aa15d Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Fri, 7 Jul 2023 15:00:25 +0100
Subject: [PATCH 12/13] Relaxed TO now compatible with standard input formats:
 pandas, numpy, etc.

---
 examples/plot_relaxed_equalized_odds.py       |  25 +--
 .../_cvxpy_threshold_optimizer.py             | 148 ++++++++++++++++--
 fairlearn/postprocessing/_plotting.py         |   2 +-
 3 files changed, 141 insertions(+), 34 deletions(-)

diff --git a/examples/plot_relaxed_equalized_odds.py b/examples/plot_relaxed_equalized_odds.py
index 965504a74..05aab7b36 100644
--- a/examples/plot_relaxed_equalized_odds.py
+++ b/examples/plot_relaxed_equalized_odds.py
@@ -48,8 +48,7 @@
 # a format suitable for the ML algorithms
 
 A = X_raw["sex"]
-X = X_raw.drop(labels=["sex"], axis=1)
-X = pd.get_dummies(X)
+X = pd.get_dummies(X_raw)
 
 sc = StandardScaler()
 X_scaled = sc.fit_transform(X)
@@ -131,26 +130,12 @@
     tolerance=0,
 )
 
-
-# %%
-# NOTE: in the future the relaxed thresholder will be compatible with string sensitive features
-# TODO: this can be omitted when that happens!
-def parse_sensitive_features(series) -> np.ndarray:
-    return np.array([
-        1 if str(elem) == "Female" else 0 for elem in series
-    ])
-
-
-A_train_np = parse_sensitive_features(A_train)
-A_test_np = parse_sensitive_features(A_test)
-
-
 # %%
-fair_clf.fit(X_train, Y_train, sensitive_features=A_train_np)
+fair_clf.fit(X_train, Y_train, sensitive_features=A_train)
 
 
 # %%
-y_test_pred_postprocessed = fair_clf.predict(X_test, sensitive_features=A_test_np)
+y_test_pred_postprocessed = fair_clf.predict(X_test, sensitive_features=A_test)
 
 # %%
 postprocessed_equalized_odds_diff = equalized_odds_difference(
@@ -176,9 +161,9 @@ def compute_test_predictions_with_relaxed_constraints(tolerance: float) -> np.nd
     )
 
     # Fit
-    clf.fit(X_train, Y_train, sensitive_features=A_train_np)
+    clf.fit(X_train, Y_train, sensitive_features=A_train)
 
-    return clf.predict(X_test, sensitive_features=A_test_np)
+    return clf.predict(X_test, sensitive_features=A_test)
 
 
 # Compute predictions at different levels of tolerance
diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index 81694dd41..ab1f9e5cd 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -27,14 +27,15 @@
 from __future__ import annotations
 
 import logging
+from typing import Any
 from itertools import product
+from collections import abc
 
 import numpy as np
 from sklearn.metrics import roc_curve
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 
-# TODO: use this to validate input
-# from fairlearn.utils._input_validation import _validate_and_reformat_input
+from fairlearn.utils._input_validation import _validate_and_reformat_input
 
 from fairlearn.utils._common import _get_soft_predictions
 from fairlearn.utils._common import unpack_fp_fn_costs
@@ -192,6 +193,8 @@ def __init__(
         unpack_fp_fn_costs(self.objective_costs)
 
         # Initialize instance variables
+        self._idx_to_sensitive: dict[int, Any] = None
+        self._sensitive_to_idx: dict[Any, int] = None
         self._all_roc_data: dict = None
         self._all_roc_hulls: dict = None
         self._groupwise_roc_points: np.ndarray = None
@@ -369,13 +372,81 @@ def _max_l_inf_between_points(points: list[float | np.ndarray]) -> float:
 
         return max_violation
 
+    def _parse_sensitive_features(
+        self,
+        sensitive_features: abc.Iterable[abc.Hashable],
+        expect_prebuilt_mapping: bool = False,
+    ) -> np.ndarray:
+        """Convert the given sensitive_features to the expected format.
+
+        Expected format is composed of integer elements, sequentially numbered,
+        starting at zero. That is, if there are four different sensitive groups,
+        these will take the values: [0, 1, 2, 3].
+
+        Parameters
+        ----------
+        sensitive_features : numpy.ndarray, pandas.DataFrame, pandas.Series, or list
+            The sensitive features in any format (numeric, string, etc.).
+            Elements of `sensitive_features` must be hashable.
+
+        expect_prebuilt_mapping : bool
+            Whether to expect the sensitive features mapping to have already
+            been built before this call.
+
+        Returns
+        -------
+        sensitive_features_numeric : np.ndarray[int]
+            The sensitive features in numeric format, with values sequentially
+            numbered from zero up to `num_groups-1`, [0, 1, ..., num_groups-1].
+        """
+        # Check if sensitive_features have the expected format
+        if (
+            isinstance(sensitive_features, np.ndarray)
+            and np.issubdtype(sensitive_features.dtype, np.number)
+            and len(np.unique(sensitive_features)) == np.max(sensitive_features) + 1
+        ):
+            return sensitive_features
+
+        # Otherwise, convert to expected format
+        # Check if mapping has been built
+        if self._sensitive_to_idx is None:
+            if expect_prebuilt_mapping:
+                raise RuntimeError(
+                    "Trying to parse `sensitive_features` but mapping has not "
+                    "yet been built; must call `classifier.fit(...)` before."
+                )
+
+            self._build_sensitive_to_idx_mapping(sensitive_features)
+
+        return np.array(
+            [self._sensitive_to_idx[sens_val] for sens_val in sensitive_features],
+            dtype=int,
+        )
+
+    def _build_sensitive_to_idx_mapping(self, sensitive_features):
+        """Build an inner mapping from sensitive feature names to indices."""
+        if self._sensitive_to_idx is not None or self._idx_to_sensitive is not None:
+            logging.warning("Re-building sensitive feature map!")
+
+        # Sorted unique groups
+        unique_groups = sorted(np.unique(sensitive_features))
+
+        # Mapping (index: int) -> (sensitive_value: Any)
+        self._idx_to_sensitive = dict(enumerate(unique_groups))
+
+        # Mapping (sensitive_value: Any) -> (index: int)
+        self._sensitive_to_idx = {
+            group: idx for idx, group in self._idx_to_sensitive.items()
+        }
+
     def fit(
         self,
-        X: np.ndarray,
-        y: np.ndarray,
+        X,
+        y,
         *,
-        sensitive_features: np.ndarray,  # TODO validate input and convert to proper format
+        sensitive_features,
         y_scores: np.ndarray = None,
+        **kwargs,
     ):
         """Find the optimal fair postprocessing.
 
@@ -385,13 +456,15 @@ def fit(
 
         Parameters
         ----------
-        X : np.ndarray
-            The input features.
-        y : np.ndarray
+        X : numpy.ndarray or pandas.DataFrame
+            The feature matrix.
+
+        y : numpy.ndarray, pandas.DataFrame, pandas.Series, or list
             The input labels.
-        sensitive_features : np.ndarray
+
+        sensitive_features : numpy.ndarray, pandas.DataFrame, pandas.Series, or list
             The sensitive features (group membership) of each sample.
-            Assumes groups are numbered [0, 1, ..., num_groups-1].
+
         y_scores : np.ndarray, optional
             The pre-computed model predictions on this data.
 
@@ -400,6 +473,17 @@ def fit(
         callable
             Returns self.
         """
+        # Validate input
+        _, y, sensitive_feature_vector, _ = _validate_and_reformat_input(
+            X,
+            y,
+            sensitive_features=sensitive_features,
+            enforce_binary_labels=True,
+        )
+
+        # Parse sensitive_features to numeric format
+        sensitive_features = self._parse_sensitive_features(sensitive_feature_vector)
+
         # Compute group stats
         self._global_prevalence = np.sum(y) / len(y)
 
@@ -433,10 +517,23 @@ def fit(
             group_sizes_label_pos
         )
 
-        # Compute group-wise ROC curves
+        # Compute predictions (if `y_scores` not provided)
         if y_scores is None:
-            y_scores = _get_soft_predictions(self.predictor, X, self.predict_method)
+            y_scores = _get_soft_predictions(
+                self.predictor, X, self.predict_method, **kwargs
+            )
+
+        else:
+            if not isinstance(y_scores, np.ndarray):
+                y_scores = np.array(y_scores)
 
+            if y_scores.shape != y.shape:
+                raise ValueError(
+                    f"`y_scores.shape={y_scores.shape}` must match labels shape "
+                    f"`y.shape={y.shape}`;"
+                )
+
+        # Compute group-wise ROC curves
         self._all_roc_data = dict()
         for g in unique_groups:
             group_filter = sensitive_features == g
@@ -536,5 +633,30 @@ def _check_fit_status(self, raise_error: bool = True) -> bool:
 
         return True
 
-    def predict(self, X: np.ndarray, *, sensitive_features: np.ndarray) -> np.ndarray:
+    def predict(self, X, *, sensitive_features) -> np.ndarray:
+        """Compute predicted binary labels using the fitted postprocessing.
+
+        Parameters
+        ----------
+        X : numpy.ndarray or pandas.DataFrame
+            The feature matrix.
+
+        sensitive_features : numpy.ndarray, pandas.DataFrame, pandas.Series, or list
+            The sensitive features (group membership) of each sample.
+
+        Returns
+        -------
+        np.ndarray
+            The predicted binary labels.
+        """
+        sensitive_features_vector = _validate_and_reformat_input(
+            X=X,
+            sensitive_features=sensitive_features,
+            expect_y=False,
+            expect_sensitive_features=True,
+        )
+        sensitive_features = self._parse_sensitive_features(
+            sensitive_features_vector,
+            expect_prebuilt_mapping=True,
+        )
         return self._realized_classifier(X, sensitive_features=sensitive_features)
diff --git a/fairlearn/postprocessing/_plotting.py b/fairlearn/postprocessing/_plotting.py
index 5c9fed30b..111d5269f 100644
--- a/fairlearn/postprocessing/_plotting.py
+++ b/fairlearn/postprocessing/_plotting.py
@@ -70,7 +70,7 @@ def _raise_if_not_threshold_optimizer(obj):
     elif isinstance(obj, _RelaxedThresholdOptimizer):
         # TODO: implement plotting functionality for postprocessing w/ relaxed constraints
         raise NotImplementedError(
-            f"Plotting functionality is not yet implemented for objects of "
+            "Plotting functionality is not yet implemented for objects of "
             f"type {_RelaxedThresholdOptimizer.__name__}."
         )
 

From f219cea62ce7650fd8180dea486abd1ebbfd2085 Mon Sep 17 00:00:00 2001
From: AndreFCruz <andrecruz97@gmail.com>
Date: Fri, 7 Jul 2023 15:16:26 +0100
Subject: [PATCH 13/13] minor bug fix in Relaxed TO example

---
 examples/plot_relaxed_equalized_odds.py          | 16 +++++++++++-----
 .../postprocessing/_cvxpy_threshold_optimizer.py |  2 +-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/examples/plot_relaxed_equalized_odds.py b/examples/plot_relaxed_equalized_odds.py
index 05aab7b36..5ad35afa2 100644
--- a/examples/plot_relaxed_equalized_odds.py
+++ b/examples/plot_relaxed_equalized_odds.py
@@ -58,10 +58,14 @@
 Y = le.fit_transform(Y)
 
 # %%
-# Finally, we split the data into training and test sets:
+# Finally, we split the data into training, validation, and test sets:
+X_train, X_other, Y_train, Y_other, A_train, A_other = train_test_split(
+    X_scaled, Y, A, test_size=0.4, random_state=0, stratify=Y,
+)
 
-X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(
-    X_scaled, Y, A, test_size=0.4, random_state=0, stratify=Y
+# Split (X_other, Y_other, A_other) into validation and test
+X_test, X_val, Y_test, Y_val, A_test, A_val = train_test_split(
+    X_other, Y_other, A_other, test_size=0.5, random_state=0, stratify=Y_other,
 )
 
 # Work around indexing bug
@@ -69,6 +73,8 @@
 A_train = A_train.reset_index(drop=True)
 X_test = X_test.reset_index(drop=True)
 A_test = A_test.reset_index(drop=True)
+X_val = X_val.reset_index(drop=True)
+A_val = A_val.reset_index(drop=True)
 
 # %%
 # Training a fairness-unaware predictor
@@ -77,7 +83,7 @@
 # that does not incorporate fairness. For speed of demonstration, we use the
 # simple :class:`sklearn.linear_model.LogisticRegression` class:
 
-unmitigated_predictor = GradientBoostingClassifier(n_estimators=1000)
+unmitigated_predictor = GradientBoostingClassifier(n_estimators=500)
 
 # %%time
 unmitigated_predictor.fit(X_train, Y_train)
@@ -131,7 +137,7 @@
 )
 
 # %%
-fair_clf.fit(X_train, Y_train, sensitive_features=A_train)
+fair_clf.fit(X_val, Y_val, sensitive_features=A_val)
 
 
 # %%
diff --git a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
index ab1f9e5cd..93094493a 100644
--- a/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_cvxpy_threshold_optimizer.py
@@ -649,7 +649,7 @@ def predict(self, X, *, sensitive_features) -> np.ndarray:
         np.ndarray
             The predicted binary labels.
         """
-        sensitive_features_vector = _validate_and_reformat_input(
+        _, _, sensitive_features_vector, _ = _validate_and_reformat_input(
             X=X,
             sensitive_features=sensitive_features,
             expect_y=False,