From 06da6b36ae174f8c653f66080507b1da629c1191 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 2 Feb 2020 16:43:35 +0100
Subject: [PATCH 1/8] sklearn compatible ThresholdOptimizer

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 Makefile                                      |  18 +-
 .../postprocessing/_threshold_optimizer.py    | 364 ++++++++----------
 .../test_threshold_optimization.py            |  71 +---
 3 files changed, 189 insertions(+), 264 deletions(-)

diff --git a/Makefile b/Makefile
index a21545456..7559ff5bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,22 @@
 # simple makefile to simplify repetitive build env management tasks
 
+PYTHON ?= python
+PYTEST ?= pytest
+
+all: clean inplace test-unit
+
+clean:
+	git clean -xfd
+
+in: inplace # just a shortcut
+inplace:
+	$(PYTHON) setup.py build_ext -i
+
 test-coverage:
-	python -m pytest test -m "not notebooks" --ignore=test/perf --ignore=test/install --cov=fairlearn --cov-report=xml --cov-report=html
+	$(PYTEST) test -m "not notebooks" --ignore=test/perf --ignore=test/install --cov=fairlearn --cov-report=xml --cov-report=html
 
 test-unit:
-	python -m pytest ./test/unit
+	$(PYTEST) ./test/unit
 
 test-perf:
-	python -m pytest ./test/perf
\ No newline at end of file
+	$(PYTEST) ./test/perf
\ No newline at end of file
diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index b607e6130..69c9d180f 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -12,25 +12,20 @@
 import logging
 import numpy as np
 import pandas as pd
-import random
 
-from sklearn.exceptions import NotFittedError
-from fairlearn.postprocessing import PostProcessing
+from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier
+from sklearn.utils.validation import (check_X_y, check_consistent_length,
+                                      check_array, check_random_state,
+                                      check_is_fitted)
 from ._constants import (LABEL_KEY, SCORE_KEY, SENSITIVE_FEATURE_KEY, OUTPUT_SEPARATOR,
                          DEMOGRAPHIC_PARITY, EQUALIZED_ODDS)
 from ._roc_curve_utilities import _interpolate_curve, _get_roc
 from ._interpolated_prediction import InterpolatedPredictor
 
 # various error messages
-DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE = "{} need to be of equal length."
-EMPTY_INPUT_ERROR_MESSAGE = "At least one of sensitive_features, labels, or scores are empty."
 NON_BINARY_LABELS_ERROR_MESSAGE = "Labels other than 0/1 were provided."
-INPUT_DATA_FORMAT_ERROR_MESSAGE = "The only allowed input data formats are: " \
-                                  "list, numpy.ndarray, pandas.DataFrame, pandas.Series. " \
-                                  "Your provided data was of types ({}, {}, {})"
 NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = "Currently only {} and {} are supported " \
     "constraints.".format(DEMOGRAPHIC_PARITY, EQUALIZED_ODDS)
-PREDICT_BEFORE_FIT_ERROR_MESSAGE = "It is required to call 'fit' before 'predict'."
 MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE = "Post processing currently only supports a single " \
     "column in {}."
 SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE = "A sensitive feature named {} or {} " \
@@ -38,171 +33,9 @@
 SCORES_DATA_TOO_MANY_COLUMNS_ERROR_MESSAGE = "The provided scores data contains multiple columns."
 UNEXPECTED_DATA_TYPE_ERROR_MESSAGE = "Unexpected data type {} encountered."
 
-
-_SUPPORTED_CONSTRAINTS = [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS]
-
 logger = logging.getLogger(__name__)
 
 
-class ThresholdOptimizer(PostProcessing):
-    """An Estimator based on the threshold optimization approach.
-
-    The procedure followed is described in detail in
-    `Hardt et al. (2016) <https://arxiv.org/abs/1610.02413>`_.
-
-    :param unconstrained_predictor: The trained predictor whose output will be post processed
-    :type unconstrained_predictor: A trained predictor
-    :param estimator: An untrained estimator that will be trained, and
-        subsequently its output will be post processed
-    :type estimator: An untrained estimator
-    :param grid_size: The number of ticks on the grid over which we evaluate the curves.
-        A large grid_size means that we approximate the actual curve, so it increases the chance
-        of being very close to the actual best solution.
-    :type grid_size: int
-    :param flip: Allow flipping to negative weights if it improves accuracy.
-    :type flip: bool
-    :param plot: Show ROC/selection-error plot if True
-    :type plot: bool
-    """
-
-    def __init__(self, *, unconstrained_predictor=None, estimator=None,
-                 constraints=DEMOGRAPHIC_PARITY, grid_size=1000, flip=True, plot=False):
-        super(ThresholdOptimizer, self).__init__(
-            unconstrained_predictor=unconstrained_predictor,
-            estimator=estimator,
-            constraints=constraints)
-
-        self._constraints = constraints
-        if self._constraints not in _SUPPORTED_CONSTRAINTS:
-            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
-
-        self._grid_size = grid_size
-        self._flip = flip
-        self._plot = plot
-        self._post_processed_predictor_by_sensitive_feature = None
-
-    def fit(self, X, y, *, sensitive_features, **kwargs):
-        """Fit the model.
-
-        The fit is based on training features and labels, sensitive features,
-        as well as the fairness-unaware predictor or estimator. If an estimator was passed
-        in the constructor this fit method will call `fit(X, y, **kwargs)` on said estimator.
-
-        :param X: The feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param y: The label vector
-        :type y: numpy.ndarray, pandas.DataFrame, pandas.Series, or list
-        :param sensitive_features: sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        """
-        self._validate_input_data(X, sensitive_features, y)
-
-        # postprocessing can't handle 0/1 as floating point numbers, so this converts it to int
-        if type(y) in [np.ndarray, pd.DataFrame, pd.Series]:
-            y = y.astype(int)
-        else:
-            y = [int(y_val) for y_val in y]
-
-        if self._estimator:
-            # train estimator on data first
-            self._validate_estimator()
-            self._estimator.fit(X, y, **kwargs)
-            self._unconstrained_predictor = self._estimator
-
-        self._validate_predictor()
-
-        scores = self._unconstrained_predictor.predict(X)
-        threshold_optimization_method = None
-        if self._constraints == DEMOGRAPHIC_PARITY:
-            threshold_optimization_method = \
-                _threshold_optimization_demographic_parity
-        elif self._constraints == EQUALIZED_ODDS:
-            threshold_optimization_method = \
-                _threshold_optimization_equalized_odds
-        else:
-            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
-
-        self._post_processed_predictor_by_sensitive_feature = threshold_optimization_method(
-            sensitive_features, y, scores, self._grid_size, self._flip, self._plot)
-
-    def predict(self, X, *, sensitive_features, random_state=None):
-        """Predict label for each sample in X while taking into account sensitive features.
-
-        :param X: feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param sensitive_features: sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        :param random_state: set to a constant for reproducibility
-        :type random_state: int
-        :return: predictions in numpy.ndarray
-        """
-        if random_state:
-            random.seed(random_state)
-
-        self._validate_post_processed_predictor_is_fitted()
-        self._validate_input_data(X, sensitive_features)
-        unconstrained_predictions = self._unconstrained_predictor.predict(X)
-
-        positive_probs = _vectorized_prediction(
-            self._post_processed_predictor_by_sensitive_feature,
-            sensitive_features,
-            unconstrained_predictions)
-        return (positive_probs >= np.random.rand(len(positive_probs))) * 1
-
-    def _pmf_predict(self, X, *, sensitive_features):
-        """Probabilistic mass function.
-
-        :param X: Feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param sensitive_features: Sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: Currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        :return: array of tuples with probabilities for predicting 0 or 1, respectively. The sum
-            of the two numbers in each tuple needs to add up to 1.
-        :rtype: numpy.ndarray
-        """
-        self._validate_post_processed_predictor_is_fitted()
-        self._validate_input_data(X, sensitive_features)
-        positive_probs = _vectorized_prediction(
-            self._post_processed_predictor_by_sensitive_feature, sensitive_features,
-            self._unconstrained_predictor.predict(X))
-        return np.array([[1.0 - p, p] for p in positive_probs])
-
-    def _validate_post_processed_predictor_is_fitted(self):
-        if not self._post_processed_predictor_by_sensitive_feature:
-            raise NotFittedError(PREDICT_BEFORE_FIT_ERROR_MESSAGE)
-
-    def _validate_input_data(self, X, sensitive_features, y=None):
-        allowed_input_types = [list, np.ndarray, pd.DataFrame, pd.Series]
-        if type(X) not in allowed_input_types or \
-                type(sensitive_features) not in allowed_input_types or \
-                (y is not None and type(y) not in allowed_input_types):
-            raise TypeError(INPUT_DATA_FORMAT_ERROR_MESSAGE
-                            .format(type(X).__name__,
-                                    type(y).__name__,
-                                    type(sensitive_features).__name__))
-
-        if len(X) == 0 or len(sensitive_features) == 0 or (y is not None and len(y) == 0):
-            raise ValueError(EMPTY_INPUT_ERROR_MESSAGE)
-
-        if y is None:
-            if len(X) != len(sensitive_features) or (y is not None and len(X) != len(y)):
-                raise ValueError(DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
-                                 .format("X and sensitive_features"))
-        else:
-            if len(X) != len(sensitive_features) or (y is not None and len(X) != len(y)):
-                raise ValueError(DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
-                                 .format("X, sensitive_features, and y"))
-
-        if set(np.unique(y)) > set([0, 1]):
-            raise ValueError(NON_BINARY_LABELS_ERROR_MESSAGE)
-
-
 def _threshold_optimization_demographic_parity(sensitive_features, labels, scores, grid_size=1000,
                                                flip=True, plot=False):
     """Calculate the selection and error rates for every sensitive feature value.
@@ -427,6 +260,167 @@ def _threshold_optimization_equalized_odds(sensitive_features, labels, scores, g
     return predicted_EO_by_sensitive_feature
 
 
+_SUPPORTED_CONSTRAINTS = {
+    DEMOGRAPHIC_PARITY: _threshold_optimization_demographic_parity,
+    EQUALIZED_ODDS: _threshold_optimization_equalized_odds}
+
+
+def _get_soft_predictions(estimator, X):
+    """Returns soft predictions of a classifier using either `predict_proba`
+    or `decision_function` methods.
+    """
+    if hasattr(estimator, "predict_proba"):
+        return estimator.predict_proba(X)
+    elif hasattr(estimator, "decision_function"):
+        return estimator.decision_function(X)
+    else:
+        raise ValueError("{} provides neither a `predict_proba` nor a "
+                            "`decision_function` method.".format(
+                                estimator.__class__.__name__
+                            ))
+
+
+class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
+    """A classifier based on the threshold optimization approach.
+
+    The procedure followed is described in detail in
+    `Hardt et al. (2016) <https://arxiv.org/abs/1610.02413>`_.
+
+    :param unconstrained_predictor: The trained predictor whose output will be
+    post processed
+    :type unconstrained_predictor: A trained predictor
+
+    :param estimator: An untrained estimator that will be trained, and
+        subsequently its output will be post processed
+    :type estimator: An untrained estimator
+
+    :param grid_size: The number of ticks on the grid over which we evaluate the
+        curves. A large grid_size means that we approximate the actual curve, so
+        it increases the chance of being very close to the actual best solution.
+    :type grid_size: int
+
+    :param flip: Allow flipping to negative weights if it improves accuracy.
+    :type flip: bool
+
+    :param plot: Show ROC/selection-error plot if True
+    :type plot: bool
+
+    :param random_state: set to a constant for reproducibility
+    :type random_state: int, np.RandomState, or None
+    """
+
+    def __init__(self, *, estimator=None, constraints=DEMOGRAPHIC_PARITY,
+                 grid_size=1000, flip=True, plot=False, random_state=None):
+
+        self.constraints = constraints
+        self.grid_size = grid_size
+        self.flip = flip
+        self.plot = plot
+        self.estimator = estimator
+        self.random_state = random_state
+
+    def fit(self, X, y, *, sensitive_features, **kwargs):
+        """Fit the model.
+
+        The fit is based on training features and labels, sensitive features, as
+        well as the fairness-unaware predictor or estimator. If an estimator was
+        passed in the constructor this fit method will call `fit(X, y,
+        **kwargs)` on said estimator.
+
+        :param X: The feature matrix
+        :type X: numpy.ndarray or pandas.DataFrame
+
+        :param y: The label vector
+        :type y: numpy.ndarray, pandas.DataFrame, pandas.Series, or list
+
+        :param sensitive_features: sensitive features to identify groups by,
+            currently allows only a single column
+        :type sensitive_features: currently 1D array as numpy.ndarray, list,
+            pandas.DataFrame, or pandas.Series
+        """
+        if self.constraints not in _SUPPORTED_CONSTRAINTS:
+            raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
+        threshold_optimizer = _SUPPORTED_CONSTRAINTS[self.constraints]
+
+        if not is_classifier(self.estimator):
+            raise ValueError("{} needs a classifier to work on, and {} is not "
+                             "one.".format(self.__class__.__name__,
+                                           self.estimator.__class__.__name__))
+
+        X, y, sensitive_features = self._validate_input_data(
+            X, sensitive_features, y)
+
+        self.estimator_ = self.estimator.fit(X, y, **kwargs)
+
+        scores = _get_soft_predictions(self.estimator_, X)
+        self._post_processed_predictor_by_sensitive_feature = threshold_optimizer(
+            sensitive_features, y, scores, self.grid_size, self.flip, self.plot)
+
+    def predict(self, X, *, sensitive_features):
+        """Predict label for each sample in X while taking into account sensitive features.
+
+        :param X: feature matrix
+        :type X: numpy.ndarray or pandas.DataFrame
+
+        :param sensitive_features: sensitive features to identify groups by, currently allows
+            only a single column
+        :type sensitive_features: currently 1D array as numpy.ndarray, list, pandas.DataFrame,
+            or pandas.Series
+
+        :return: predictions in numpy.ndarray
+        """
+        check_is_fitted(self)
+        random_state = check_random_state(self.random_state)
+
+        X, _, sensitive_features = self._validate_input_data(
+            X, sensitive_features)
+
+        unconstrained_predictions = self._unconstrained_predictor.predict(X)
+
+        positive_probs = _vectorized_prediction(
+            self._post_processed_predictor_by_sensitive_feature,
+            sensitive_features,
+            unconstrained_predictions)
+        return (positive_probs >= random_state.rand(len(positive_probs))) * 1
+
+    def _pmf_predict(self, X, *, sensitive_features):
+        """Probabilistic mass function.
+
+        :param X: Feature matrix
+        :type X: numpy.ndarray or pandas.DataFrame
+
+        :param sensitive_features: Sensitive features to identify groups by,
+            currently allows only a single column
+
+        :type sensitive_features: Currently 1D array as numpy.ndarray, list,
+            pandas.DataFrame, or pandas.Series
+
+        :return: array of tuples with probabilities for predicting 0 or 1,
+            respectively. The sum of the two numbers in each tuple needs to add
+            up to 1.
+        :rtype: numpy.ndarray
+        """
+        check_is_fitted(self)
+        X, _, sensitive_features = self._validate_input_data(
+            X, sensitive_features)
+        positive_probs = _vectorized_prediction(
+            self._post_processed_predictor_by_sensitive_feature, sensitive_features,
+            self._unconstrained_predictor.predict(X))
+        return np.array([[1.0 - p, p] for p in positive_probs])
+
+    def _validate_input_data(self, X, sensitive_features, y=None):
+        if y:
+            X, y = check_X_y(X, y)
+            y = check_array(y, ensure_2d=False, dtype=int)
+        else:
+            X = check_array(X)
+        check_consistent_length(X, sensitive_features)
+
+        if set(np.unique(y)) > set([0, 1]):
+            raise ValueError(NON_BINARY_LABELS_ERROR_MESSAGE)
+        return X, y, sensitive_features
+
+
 def _vectorized_prediction(function_dict, sensitive_features, scores):
     """Make predictions for all samples with all provided functions.
 
@@ -443,37 +437,13 @@ def _vectorized_prediction(function_dict, sensitive_features, scores):
     :type scores: list, numpy.ndarray, pandas.DataFrame, or pandas.Series
     """
     # handle type conversion to ndarray for other types
-    sensitive_features_vector = _convert_to_ndarray(
-        sensitive_features, MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE.format("sensitive_features"))
-    scores_vector = _convert_to_ndarray(scores, SCORES_DATA_TOO_MANY_COLUMNS_ERROR_MESSAGE)
+    sensitive_features_vector = check_array(sensitive_features, ensure_2d=False)
+    scores_vector = check_array(scores, ensure_2d=False)
 
     return sum([(sensitive_features_vector == a) * function_dict[a].predict(scores_vector)
                 for a in function_dict])
 
 
-def _convert_to_ndarray(data, dataframe_multiple_columns_error_message):
-    """Convert the input data from list, pandas.Series, or pandas.DataFrame to numpy.ndarray.
-
-    :param data: the data to be converted into a numpy.ndarray
-    :type data: numpy.ndarray, pandas.Series, pandas.DataFrame, or list
-    :param dataframe_multiple_columns_error_message: the error message to show in case the
-        provided data is more than 1-dimensional
-    :type dataframe_multiple_columns_error_message:
-    :return: the input data formatted as numpy.ndarray
-    :rtype: numpy.ndarray
-    """
-    if type(data) == list:
-        data = np.array(data)
-    elif type(data) == pd.DataFrame:
-        if len(data.columns) > 1:
-            # TODO: extend to multiple columns for additional group data
-            raise ValueError(dataframe_multiple_columns_error_message)
-        data = data[data.columns[0]].values
-    elif type(data) == pd.Series:
-        data = data.values
-    return data
-
-
 def _reformat_and_group_data(sensitive_features, labels, scores, sensitive_feature_names=None):
     """Reformats the data into a new pandas.DataFrame and group by sensitive feature values.
 
diff --git a/test/unit/postprocessing/test_threshold_optimization.py b/test/unit/postprocessing/test_threshold_optimization.py
index 1aa00a9bc..f90c32dbd 100644
--- a/test/unit/postprocessing/test_threshold_optimization.py
+++ b/test/unit/postprocessing/test_threshold_optimization.py
@@ -11,12 +11,8 @@
     (_vectorized_prediction,
      _threshold_optimization_demographic_parity,
      _threshold_optimization_equalized_odds,
-     DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE,
-     EMPTY_INPUT_ERROR_MESSAGE,
      NON_BINARY_LABELS_ERROR_MESSAGE,
-     INPUT_DATA_FORMAT_ERROR_MESSAGE,
      NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE,
-     PREDICT_BEFORE_FIT_ERROR_MESSAGE,
      MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE)
 from fairlearn.postprocessing._postprocessing import \
     PREDICTOR_OR_ESTIMATOR_REQUIRED_ERROR_MESSAGE, EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE, \
@@ -30,34 +26,13 @@
 
 
 ALLOWED_INPUT_DATA_TYPES = [lambda x: x, np.array, pd.DataFrame, pd.Series]
-
-
-@pytest.mark.parametrize("X_transform", ALLOWED_INPUT_DATA_TYPES)
-@pytest.mark.parametrize("sensitive_features_transform", ALLOWED_INPUT_DATA_TYPES)
-@pytest.mark.parametrize("predict_method_name", ['predict', '_pmf_predict'])
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_predict_before_fit_error(X_transform, sensitive_features_transform, predict_method_name,
-                                  constraints):
-    X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
-    sensitive_features = sensitive_features_transform(sensitive_features_ex1)
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
-
-    with pytest.raises(ValueError, match=PREDICT_BEFORE_FIT_ERROR_MESSAGE):
-        getattr(adjusted_predictor, predict_method_name)(X, sensitive_features=sensitive_features)
-
-
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_both_predictor_and_estimator_error(constraints):
-    with pytest.raises(ValueError, match=EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE):
-        ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                           estimator=ExampleEstimator(),
-                           constraints=constraints)
+DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE = ("Found input variables with "
+                                        "inconsistent numbers of")
 
 
 @pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
 def test_no_predictor_or_estimator_error(constraints):
-    with pytest.raises(ValueError, match=PREDICTOR_OR_ESTIMATOR_REQUIRED_ERROR_MESSAGE):
+    with pytest.raises(ValueError, match="needs a classifier to work on"):
         ThresholdOptimizer(constraints=constraints)
 
 
@@ -82,24 +57,6 @@ def test_not_predictor(constraints):
                            constraints=constraints)
 
 
-@pytest.mark.parametrize("X", [None, _format_as_list_of_lists(sensitive_features_ex1)])
-@pytest.mark.parametrize("y", [None, labels_ex])
-@pytest.mark.parametrize("sensitive_features", [None, sensitive_features_ex1])
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_inconsistent_input_data_types(X, y, sensitive_features, constraints):
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
-
-    error_message = INPUT_DATA_FORMAT_ERROR_MESSAGE.format(type(X).__name__,
-                                                           type(y).__name__,
-                                                           type(sensitive_features).__name__)
-
-    if X is None or y is None and sensitive_features is None:
-        with pytest.raises(TypeError) as exception:
-            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
-        assert str(exception.value) == error_message
-
-
 @pytest.mark.parametrize("X_transform", ALLOWED_INPUT_DATA_TYPES)
 @pytest.mark.parametrize("y_transform", ALLOWED_INPUT_DATA_TYPES)
 @pytest.mark.parametrize("sensitive_features_transform", ALLOWED_INPUT_DATA_TYPES)
@@ -146,8 +103,8 @@ def test_threshold_optimization_different_input_lengths(X_transform, y_transform
                                                         constraints):
     n = len(sensitive_features_ex1)
     for permutation in [(0, 1), (1, 0)]:
-        with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
-                           .format("X, sensitive_features, and y")):
+        with pytest.raises(ValueError,
+                           match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE):
             X = X_transform(_format_as_list_of_lists(
                 sensitive_features_ex1)[:n - permutation[0]])
             y = y_transform(labels_ex[:n - permutation[1]])
@@ -157,18 +114,6 @@ def test_threshold_optimization_different_input_lengths(X_transform, y_transform
                                                     constraints=constraints)
             adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
 
-    # try providing empty lists in all combinations
-    for permutation in [(0, n), (n, 0)]:
-        X = X_transform(_format_as_list_of_lists(
-            sensitive_features_ex1)[:n - permutation[0]])
-        y = y_transform(labels_ex[:n - permutation[1]])
-        sensitive_features = sensitive_features_transform(sensitive_features_ex1)
-
-        adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                                constraints=constraints)
-        with pytest.raises(ValueError, match=EMPTY_INPUT_ERROR_MESSAGE):
-            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
-
 
 @pytest.mark.parametrize("score_transform", ALLOWED_INPUT_DATA_TYPES)
 @pytest.mark.parametrize("y_transform", ALLOWED_INPUT_DATA_TYPES)
@@ -449,13 +394,11 @@ def test_predict_different_argument_lengths(sensitive_features, sensitive_featur
                                             constraints=constraints)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
-    with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
-                       .format("X and sensitive_features")):
+    with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE):
         adjusted_predictor.predict(
             X, sensitive_features=sensitive_features_transform(sensitive_features[:-1]))
 
-    with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
-                       .format("X and sensitive_features")):
+    with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE):
         adjusted_predictor.predict(X_transform(_format_as_list_of_lists(sensitive_features))[:-1],
                                    sensitive_features=sensitive_features_)
 

From 10754c4d4f00d01e5b9c4b0a80ebd5295058726c Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 2 Feb 2020 16:51:51 +0100
Subject: [PATCH 2/8] remove _postprosessing.py

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 fairlearn/postprocessing/_postprocessing.py | 108 --------------------
 1 file changed, 108 deletions(-)
 delete mode 100644 fairlearn/postprocessing/_postprocessing.py

diff --git a/fairlearn/postprocessing/_postprocessing.py b/fairlearn/postprocessing/_postprocessing.py
deleted file mode 100644
index 40ecc44e2..000000000
--- a/fairlearn/postprocessing/_postprocessing.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-PREDICTOR_OR_ESTIMATOR_REQUIRED_ERROR_MESSAGE = "One of 'unconstrained_predictor' and " \
-                                                "'estimator' need to be passed."
-EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE = "Only one of 'unconstrained_predictor' and " \
-                                              "'estimator' can be passed."
-MISSING_FIT_PREDICT_ERROR_MESSAGE = "The model does not have callable 'fit' or 'predict' methods."
-MISSING_PREDICT_ERROR_MESSAGE = "The predictor does not have a callable 'predict' method."
-
-
-class PostProcessing:
-    """Abstract base class for postprocessing approaches for disparity mitigation.
-
-    :param unconstrained_predictor: A predictor with a :code:`predict(X)` method that has already
-        been trained on the training data; the predictor will subsequently be used in the mitigator
-        for unconstrained predictions; can only be specified if `estimator` is `None`
-    :type unconstrainted_predictor: predictor
-
-    :param estimator: An estimator implementing :code:`fit(X, y)` and :code:`predict(X)` methods
-        that will be trained on the training data and subsequently used in the mitigator for
-        unconstrained predictions; can only be specified if `unconstrainted_predictor` is `None`
-    :type estimator: estimator
-
-    :param constraints: the parity constraints to be enforced represented as a string
-    :type constraints: str
-    """
-
-    def __init__(self, *, unconstrained_predictor=None, estimator=None,
-                 constraints=None):
-        if unconstrained_predictor and estimator:
-            raise ValueError(EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE)
-        elif unconstrained_predictor:
-            self._unconstrained_predictor = unconstrained_predictor
-            self._estimator = None
-            self._validate_predictor()
-        elif estimator:
-            self._unconstrained_predictor = None
-            self._estimator = estimator
-            self._validate_estimator()
-        else:
-            raise ValueError(PREDICTOR_OR_ESTIMATOR_REQUIRED_ERROR_MESSAGE)
-
-    def fit(self, X, y, *, sensitive_features, **kwargs):
-        """Fits the model.
-
-        The fit is based on training features and labels, sensitive features,
-        as well as the fairness-unaware predictor or estimator. If an estimator was passed
-        in the constructor this fit method will call :code:`fit(X, y, **kwargs)` on said
-        estimator.
-
-        :param X: Feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param y: Label vector
-        :type y: numpy.ndarray, pandas.DataFrame, pandas.Series, or list
-        :param sensitive_features: Sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        """
-        raise NotImplementedError(self.fit.__name__ + " is not implemented")
-
-    def predict(self, X, *, sensitive_features):
-        """Predict label for each sample in `X` while taking into account sensitive features.
-
-        :param X: Feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param sensitive_features: Sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: Currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        :return: predictions in numpy.ndarray
-        """
-        raise NotImplementedError(self.predict.__name__ + " is not implemented")
-
-    def _pmf_predict(self, X, *, sensitive_features):
-        """Probabilistic mass function.
-
-        :param X: Feature matrix
-        :type X: numpy.ndarray or pandas.DataFrame
-        :param sensitive_features: Sensitive features to identify groups by, currently allows
-            only a single column
-        :type sensitive_features: Currently 1D array as numpy.ndarray, list, pandas.DataFrame,
-            or pandas.Series
-        :return: array of tuples with probabilities for predicting 0 or 1, respectively. The sum
-            of the two numbers in each tuple needs to add up to 1.
-        :rtype: numpy.ndarray
-        """
-        raise NotImplementedError(self._pmf_predict.__name__ + " is not implemented")
-
-    def _validate_predictor(self):
-        """Validate that the _unconstrained_predictor member has a predict function."""
-        predict_function = getattr(self._unconstrained_predictor, "predict", None)
-        if not predict_function or not callable(predict_function):
-            raise ValueError(MISSING_PREDICT_ERROR_MESSAGE)
-
-    def _validate_estimator(self):
-        """Validate that the `_estimator` member has both a fit and a predict function."""
-        fit_function = getattr(self._estimator, "fit", None)
-        predict_function = getattr(self._estimator, "predict", None)
-        if not predict_function or not fit_function or not callable(predict_function) or \
-                not callable(fit_function):
-            raise ValueError(MISSING_FIT_PREDICT_ERROR_MESSAGE)
-
-
-# Ensure that PostProcessing shows up in correct place in documentation
-# when it is used as a base class
-PostProcessing.__module__ = "fairlearn.postprocessing"

From 78b896337d2b00071681ef198bafca4b825b342d Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 5 Feb 2020 11:50:35 +0100
Subject: [PATCH 3/8] apply suggestions

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 .../postprocessing/_threshold_optimizer.py    | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index 69c9d180f..04d68be14 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-"""Threshold Optimization Post Processing algorithm.
+"""Threshold Optimization Postprocessing algorithm.
 
 This is based on M. Hardt, E. Price, N. Srebro's paper
 "`Equality of Opportunity in Supervised Learning
@@ -26,7 +26,7 @@
 NON_BINARY_LABELS_ERROR_MESSAGE = "Labels other than 0/1 were provided."
 NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = "Currently only {} and {} are supported " \
     "constraints.".format(DEMOGRAPHIC_PARITY, EQUALIZED_ODDS)
-MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE = "Post processing currently only supports a single " \
+MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE = "Postprocessing currently only supports a single " \
     "column in {}."
 SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE = "A sensitive feature named {} or {} " \
     "was detected. Please rename your column and try again.".format(SCORE_KEY, LABEL_KEY)
@@ -260,7 +260,7 @@ def _threshold_optimization_equalized_odds(sensitive_features, labels, scores, g
     return predicted_EO_by_sensitive_feature
 
 
-_SUPPORTED_CONSTRAINTS = {
+_SUPPORTED_MITIGATIONS = {
     DEMOGRAPHIC_PARITY: _threshold_optimization_demographic_parity,
     EQUALIZED_ODDS: _threshold_optimization_equalized_odds}
 
@@ -287,11 +287,11 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
     `Hardt et al. (2016) <https://arxiv.org/abs/1610.02413>`_.
 
     :param unconstrained_predictor: The trained predictor whose output will be
-    post processed
+    postprocessed
     :type unconstrained_predictor: A trained predictor
 
     :param estimator: An untrained estimator that will be trained, and
-        subsequently its output will be post processed
+        subsequently its output will be postprocessed
     :type estimator: An untrained estimator
 
     :param grid_size: The number of ticks on the grid over which we evaluate the
@@ -338,9 +338,11 @@ def fit(self, X, y, *, sensitive_features, **kwargs):
         :type sensitive_features: currently 1D array as numpy.ndarray, list,
             pandas.DataFrame, or pandas.Series
         """
-        if self.constraints not in _SUPPORTED_CONSTRAINTS:
+        if self.constraints not in _SUPPORTED_MITIGATIONS:
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
-        threshold_optimizer = _SUPPORTED_CONSTRAINTS[self.constraints]
+        threshold_optimizer = _SUPPORTED_MITIGATIONS[self.constraints]
+
+        self.random_state_ = check_random_state(self.random_state)
 
         if not is_classifier(self.estimator):
             raise ValueError("{} needs a classifier to work on, and {} is not "
@@ -370,7 +372,6 @@ def predict(self, X, *, sensitive_features):
         :return: predictions in numpy.ndarray
         """
         check_is_fitted(self)
-        random_state = check_random_state(self.random_state)
 
         X, _, sensitive_features = self._validate_input_data(
             X, sensitive_features)
@@ -381,7 +382,8 @@ def predict(self, X, *, sensitive_features):
             self._post_processed_predictor_by_sensitive_feature,
             sensitive_features,
             unconstrained_predictions)
-        return (positive_probs >= random_state.rand(len(positive_probs))) * 1
+        return (positive_probs >= self.random_state_.rand(
+            len(positive_probs))) * 1
 
     def _pmf_predict(self, X, *, sensitive_features):
         """Probabilistic mass function.

From 6c0cc01bc108e9d015a067dcea7ceebd7c6aab56 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 6 Feb 2020 12:47:26 +0100
Subject: [PATCH 4/8] add warm_start

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 .../postprocessing/_threshold_optimizer.py      | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index 04d68be14..a77b38ba0 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -305,18 +305,24 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
     :param plot: Show ROC/selection-error plot if True
     :type plot: bool
 
+    :param warm_start: Avoid refitting the underlying estimator if it's already
+        done in a previous fit.
+    :type warm_start: bool, default=False
+
     :param random_state: set to a constant for reproducibility
-    :type random_state: int, np.RandomState, or None
+    :type random_state: int, np.RandomState, default=None
     """
 
     def __init__(self, *, estimator=None, constraints=DEMOGRAPHIC_PARITY,
-                 grid_size=1000, flip=True, plot=False, random_state=None):
+                 grid_size=1000, flip=True, plot=False, warm_start=False,
+                 random_state=None):
 
         self.constraints = constraints
         self.grid_size = grid_size
         self.flip = flip
         self.plot = plot
         self.estimator = estimator
+        self.warm_start = warm_start
         self.random_state = random_state
 
     def fit(self, X, y, *, sensitive_features, **kwargs):
@@ -342,7 +348,7 @@ def fit(self, X, y, *, sensitive_features, **kwargs):
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
         threshold_optimizer = _SUPPORTED_MITIGATIONS[self.constraints]
 
-        self.random_state_ = check_random_state(self.random_state)
+        self._random_state = check_random_state(self.random_state)
 
         if not is_classifier(self.estimator):
             raise ValueError("{} needs a classifier to work on, and {} is not "
@@ -352,7 +358,8 @@ def fit(self, X, y, *, sensitive_features, **kwargs):
         X, y, sensitive_features = self._validate_input_data(
             X, sensitive_features, y)
 
-        self.estimator_ = self.estimator.fit(X, y, **kwargs)
+        if not hasattr(self, 'estimator_') or not self.warm_start:
+            self.estimator_ = self.estimator.fit(X, y, **kwargs)
 
         scores = _get_soft_predictions(self.estimator_, X)
         self._post_processed_predictor_by_sensitive_feature = threshold_optimizer(
@@ -382,7 +389,7 @@ def predict(self, X, *, sensitive_features):
             self._post_processed_predictor_by_sensitive_feature,
             sensitive_features,
             unconstrained_predictions)
-        return (positive_probs >= self.random_state_.rand(
+        return (positive_probs >= self._random_state.rand(
             len(positive_probs))) * 1
 
     def _pmf_predict(self, X, *, sensitive_features):

From 390544eac39b85ced8ad73a03729016b65d6120f Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 6 Feb 2020 14:58:25 +0100
Subject: [PATCH 5/8] further fixes, passing a part of the tests

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 fairlearn/postprocessing/__init__.py          |  2 -
 .../postprocessing/_threshold_optimizer.py    | 13 +--
 .../test_threshold_optimization.py            | 81 ++++++-------------
 3 files changed, 32 insertions(+), 64 deletions(-)

diff --git a/fairlearn/postprocessing/__init__.py b/fairlearn/postprocessing/__init__.py
index 04e91db0f..f301c1ce7 100644
--- a/fairlearn/postprocessing/__init__.py
+++ b/fairlearn/postprocessing/__init__.py
@@ -7,10 +7,8 @@
 learn how to adjust the predictor's output from the training data.
 """
 
-from ._postprocessing import PostProcessing  # noqa: F401
 from ._threshold_optimizer import ThresholdOptimizer  # noqa: F401
 
 __all__ = [
-    "PostProcessing",
     "ThresholdOptimizer"
 ]
diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index a77b38ba0..8f6e04861 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -14,6 +14,7 @@
 import pandas as pd
 
 from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier
+from sklearn.linear_model import SGDClassifier
 from sklearn.utils.validation import (check_X_y, check_consistent_length,
                                       check_array, check_random_state,
                                       check_is_fitted)
@@ -292,7 +293,7 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
 
     :param estimator: An untrained estimator that will be trained, and
         subsequently its output will be postprocessed
-    :type estimator: An untrained estimator
+    :type estimator: An untrained estimator, default=SGDClassifier()
 
     :param grid_size: The number of ticks on the grid over which we evaluate the
         curves. A large grid_size means that we approximate the actual curve, so
@@ -313,7 +314,8 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
     :type random_state: int, np.RandomState, default=None
     """
 
-    def __init__(self, *, estimator=None, constraints=DEMOGRAPHIC_PARITY,
+    def __init__(self, *, estimator=SGDClassifier(),
+                 constraints=DEMOGRAPHIC_PARITY,
                  grid_size=1000, flip=True, plot=False, warm_start=False,
                  random_state=None):
 
@@ -383,7 +385,8 @@ def predict(self, X, *, sensitive_features):
         X, _, sensitive_features = self._validate_input_data(
             X, sensitive_features)
 
-        unconstrained_predictions = self._unconstrained_predictor.predict(X)
+        unconstrained_predictions = _get_soft_predictions(
+            self.estimator_, X)
 
         positive_probs = _vectorized_prediction(
             self._post_processed_predictor_by_sensitive_feature,
@@ -414,11 +417,11 @@ def _pmf_predict(self, X, *, sensitive_features):
             X, sensitive_features)
         positive_probs = _vectorized_prediction(
             self._post_processed_predictor_by_sensitive_feature, sensitive_features,
-            self._unconstrained_predictor.predict(X))
+            _get_soft_predictions(self.estimator_, X))
         return np.array([[1.0 - p, p] for p in positive_probs])
 
     def _validate_input_data(self, X, sensitive_features, y=None):
-        if y:
+        if y is not None:
             X, y = check_X_y(X, y)
             y = check_array(y, ensure_2d=False, dtype=int)
         else:
diff --git a/test/unit/postprocessing/test_threshold_optimization.py b/test/unit/postprocessing/test_threshold_optimization.py
index f90c32dbd..b7ebbd15f 100644
--- a/test/unit/postprocessing/test_threshold_optimization.py
+++ b/test/unit/postprocessing/test_threshold_optimization.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from sklearn.datasets import make_classification
 from fairlearn.postprocessing._constants import DEMOGRAPHIC_PARITY, EQUALIZED_ODDS
 from fairlearn.postprocessing import ThresholdOptimizer
 from fairlearn.postprocessing._threshold_optimizer import \
@@ -14,9 +15,6 @@
      NON_BINARY_LABELS_ERROR_MESSAGE,
      NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE,
      MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE)
-from fairlearn.postprocessing._postprocessing import \
-    PREDICTOR_OR_ESTIMATOR_REQUIRED_ERROR_MESSAGE, EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE, \
-    MISSING_FIT_PREDICT_ERROR_MESSAGE, MISSING_PREDICT_ERROR_MESSAGE
 from fairlearn.postprocessing._roc_curve_utilities import DEGENERATE_LABELS_ERROR_MESSAGE
 from .test_utilities import (sensitive_features_ex1, sensitive_features_ex2, labels_ex,
                              degenerate_labels_ex, scores_ex, sensitive_feature_names_ex1,
@@ -30,31 +28,13 @@
                                         "inconsistent numbers of")
 
 
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_no_predictor_or_estimator_error(constraints):
-    with pytest.raises(ValueError, match="needs a classifier to work on"):
-        ThresholdOptimizer(constraints=constraints)
-
-
 def test_constraints_not_supported():
+    X, y = make_classification()
+    sensitive_arg = np.random.rand(len(X))
     with pytest.raises(ValueError, match=NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE):
-        ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                           constraints="UnsupportedConstraints")
-
-
-@pytest.mark.parametrize("not_estimator", [ExampleNotEstimator1(), ExampleNotEstimator2()])
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_not_estimator(not_estimator, constraints):
-    with pytest.raises(ValueError, match=MISSING_FIT_PREDICT_ERROR_MESSAGE):
-        ThresholdOptimizer(estimator=not_estimator,
-                           constraints=constraints)
-
-
-@pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
-def test_not_predictor(constraints):
-    with pytest.raises(ValueError, match=MISSING_PREDICT_ERROR_MESSAGE):
-        ThresholdOptimizer(unconstrained_predictor=ExampleNotPredictor(),
-                           constraints=constraints)
+        ThresholdOptimizer(constraints="UnsupportedConstraints").fit(
+            X, y, sensitive_features=sensitive_arg
+        )
 
 
 @pytest.mark.parametrize("X_transform", ALLOWED_INPUT_DATA_TYPES)
@@ -63,15 +43,11 @@ def test_not_predictor(constraints):
 @pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
 def test_threshold_optimization_non_binary_labels(X_transform, y_transform,
                                                   sensitive_features_transform, constraints):
-    non_binary_labels = copy.deepcopy(labels_ex)
-    non_binary_labels[0] = 2
-
-    X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
-    y = y_transform(non_binary_labels)
     sensitive_features = sensitive_features_transform(sensitive_features_ex1)
+    X, y = make_classification(n_classes=3, n_informative=8,
+                               n_samples=len(sensitive_features))
 
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
+    adjusted_predictor = ThresholdOptimizer(constraints=constraints)
 
     with pytest.raises(ValueError, match=NON_BINARY_LABELS_ERROR_MESSAGE):
         adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
@@ -83,14 +59,13 @@ def test_threshold_optimization_non_binary_labels(X_transform, y_transform,
 @pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
 def test_threshold_optimization_degenerate_labels(X_transform, y_transform,
                                                   sensitive_features_transform, constraints):
-    X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
-    y = y_transform(degenerate_labels_ex)
     sensitive_features = sensitive_features_transform(sensitive_features_ex1)
+    X = np.random.rand(len(sensitive_features), 2)
+    y = np.zeros(shape=(len(sensitive_features)))
 
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
+    adjusted_predictor = ThresholdOptimizer(constraints=constraints)
 
-    with pytest.raises(ValueError, match=DEGENERATE_LABELS_ERROR_MESSAGE.format('A')):
+    with pytest.raises(ValueError, match="The number of classes has to be"):
         adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
 
 
@@ -102,16 +77,15 @@ def test_threshold_optimization_different_input_lengths(X_transform, y_transform
                                                         sensitive_features_transform,
                                                         constraints):
     n = len(sensitive_features_ex1)
+    X_orig, y_orig = make_classification(n_samples=n)
     for permutation in [(0, 1), (1, 0)]:
         with pytest.raises(ValueError,
                            match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE):
-            X = X_transform(_format_as_list_of_lists(
-                sensitive_features_ex1)[:n - permutation[0]])
-            y = y_transform(labels_ex[:n - permutation[1]])
+            X = X_orig[:n - permutation[0]]
+            y = y_orig[:n - permutation[1]]
             sensitive_features = sensitive_features_transform(sensitive_features_ex1)
 
-            adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                                    constraints=constraints)
+            adjusted_predictor = ThresholdOptimizer(constraints=constraints)
             adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
 
 
@@ -276,11 +250,9 @@ def test_threshold_optimization_demographic_parity_e2e(sensitive_features,
                                                        expected_p0, expected_p1,
                                                        X_transform, y_transform,
                                                        sensitive_features_transform):
-    X = X_transform(_format_as_list_of_lists(sensitive_features))
-    y = y_transform(labels_ex)
     sensitive_features_ = sensitive_features_transform(sensitive_features)
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=DEMOGRAPHIC_PARITY)
+    X, y = make_classification(n_samples=len(sensitive_features_))
+    adjusted_predictor = ThresholdOptimizer(constraints=DEMOGRAPHIC_PARITY)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     predictions = adjusted_predictor._pmf_predict(X, sensitive_features=sensitive_features_)
@@ -310,8 +282,7 @@ def test_threshold_optimization_equalized_odds_e2e(
     X = X_transform(_format_as_list_of_lists(sensitive_features))
     y = y_transform(labels_ex)
     sensitive_features_ = sensitive_features_transform(sensitive_features)
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=EQUALIZED_ODDS)
+    adjusted_predictor = ThresholdOptimizer(constraints=EQUALIZED_ODDS)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     predictions = adjusted_predictor._pmf_predict(X, sensitive_features=sensitive_features_)
@@ -348,8 +319,7 @@ def test_predict_output_0_or_1(sensitive_features, sensitive_feature_names, X_tr
     X = X_transform(_format_as_list_of_lists(sensitive_features))
     y = y_transform(labels_ex)
     sensitive_features_ = sensitive_features_transform(sensitive_features)
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
+    adjusted_predictor = ThresholdOptimizer(constraints=constraints)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     predictions = adjusted_predictor.predict(X, sensitive_features=sensitive_features_)
@@ -365,11 +335,9 @@ def test_predict_output_0_or_1(sensitive_features, sensitive_feature_names, X_tr
 @pytest.mark.parametrize("constraints", [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS])
 def test_predict_multiple_sensitive_features_columns_error(
         sensitive_features, sensitive_feature_names, X_transform, y_transform, constraints):
-    X = X_transform(_format_as_list_of_lists(sensitive_features))
-    y = y_transform(labels_ex)
     sensitive_features_ = pd.DataFrame({"A1": sensitive_features, "A2": sensitive_features})
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
+    X, y = make_classification(n_samples=len(sensitive_features_))
+    adjusted_predictor = ThresholdOptimizer(constraints=constraints)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     with pytest.raises(ValueError,
@@ -390,8 +358,7 @@ def test_predict_different_argument_lengths(sensitive_features, sensitive_featur
     X = X_transform(_format_as_list_of_lists(sensitive_features))
     y = y_transform(labels_ex)
     sensitive_features_ = sensitive_features_transform(sensitive_features)
-    adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
-                                            constraints=constraints)
+    adjusted_predictor = ThresholdOptimizer(constraints=constraints)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE):

From f7326a69399a282fb04d6fb10df3d3240038ba3b Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 6 Feb 2020 15:02:56 +0100
Subject: [PATCH 6/8] further fixes, passing a part of the tests

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 test/unit/postprocessing/test_threshold_optimization.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/postprocessing/test_threshold_optimization.py b/test/unit/postprocessing/test_threshold_optimization.py
index b7ebbd15f..1f113879b 100644
--- a/test/unit/postprocessing/test_threshold_optimization.py
+++ b/test/unit/postprocessing/test_threshold_optimization.py
@@ -251,8 +251,11 @@ def test_threshold_optimization_demographic_parity_e2e(sensitive_features,
                                                        X_transform, y_transform,
                                                        sensitive_features_transform):
     sensitive_features_ = sensitive_features_transform(sensitive_features)
-    X, y = make_classification(n_samples=len(sensitive_features_))
-    adjusted_predictor = ThresholdOptimizer(constraints=DEMOGRAPHIC_PARITY)
+    X = np.random.rand(len(sensitive_features_), 2)
+    y = y_transform(labels_ex)
+
+    adjusted_predictor = ThresholdOptimizer(estimator=ExampleEstimator(),
+                                            constraints=DEMOGRAPHIC_PARITY)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
     predictions = adjusted_predictor._pmf_predict(X, sensitive_features=sensitive_features_)

From df89f19d1bec38c510904d03cc072db1c0e43110 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 6 Feb 2020 15:42:09 +0100
Subject: [PATCH 7/8] accept arbitrary input type for sensitive features

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 fairlearn/postprocessing/_threshold_optimizer.py        | 3 ++-
 test/unit/postprocessing/test_threshold_optimization.py | 2 +-
 test/unit/postprocessing/test_utilities.py              | 9 ++++++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index 8f6e04861..cb574d1a8 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -449,7 +449,8 @@ def _vectorized_prediction(function_dict, sensitive_features, scores):
     :type scores: list, numpy.ndarray, pandas.DataFrame, or pandas.Series
     """
     # handle type conversion to ndarray for other types
-    sensitive_features_vector = check_array(sensitive_features, ensure_2d=False)
+    sensitive_features_vector = check_array(sensitive_features, ensure_2d=False,
+                                            dtype='O')
     scores_vector = check_array(scores, ensure_2d=False)
 
     return sum([(sensitive_features_vector == a) * function_dict[a].predict(scores_vector)
diff --git a/test/unit/postprocessing/test_threshold_optimization.py b/test/unit/postprocessing/test_threshold_optimization.py
index 1f113879b..7c1761d6d 100644
--- a/test/unit/postprocessing/test_threshold_optimization.py
+++ b/test/unit/postprocessing/test_threshold_optimization.py
@@ -254,7 +254,7 @@ def test_threshold_optimization_demographic_parity_e2e(sensitive_features,
     X = np.random.rand(len(sensitive_features_), 2)
     y = y_transform(labels_ex)
 
-    adjusted_predictor = ThresholdOptimizer(estimator=ExampleEstimator(),
+    adjusted_predictor = ThresholdOptimizer(estimator=ExamplePredictor(),
                                             constraints=DEMOGRAPHIC_PARITY)
     adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
 
diff --git a/test/unit/postprocessing/test_utilities.py b/test/unit/postprocessing/test_utilities.py
index f2b7ea0d6..b6bd0a7f9 100644
--- a/test/unit/postprocessing/test_utilities.py
+++ b/test/unit/postprocessing/test_utilities.py
@@ -4,6 +4,7 @@
 from collections import defaultdict, namedtuple
 import numpy as np
 import pandas as pd
+from sklearn.base import BaseEstimator, ClassifierMixin
 from fairlearn.postprocessing._threshold_operation import ThresholdOperation
 from fairlearn.postprocessing._constants import SCORE_KEY, LABEL_KEY, SENSITIVE_FEATURE_KEY
 
@@ -19,9 +20,15 @@
 LabelAndPrediction = namedtuple('LabelAndPrediction', 'label prediction')
 
 
-class ExamplePredictor():
+class ExamplePredictor(ClassifierMixin, BaseEstimator):
+    def fit(self, *args, **kwargs):
+        return self
+
     def predict(self, X):
         return scores_ex
+    
+    def predict_proba(self, X):
+        return self.predict(X)
 
 
 class ExampleNotPredictor():

From fa2c809a0eb16cee48591675be09e9adea06acda Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 6 Feb 2020 15:49:23 +0100
Subject: [PATCH 8/8] use log loss to have predict_proba

Signed-off-by: adrinjalali <adrin.jalali@gmail.com>
---
 fairlearn/postprocessing/_threshold_optimizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fairlearn/postprocessing/_threshold_optimizer.py b/fairlearn/postprocessing/_threshold_optimizer.py
index cb574d1a8..5d64e25c1 100644
--- a/fairlearn/postprocessing/_threshold_optimizer.py
+++ b/fairlearn/postprocessing/_threshold_optimizer.py
@@ -293,7 +293,7 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
 
     :param estimator: An untrained estimator that will be trained, and
         subsequently its output will be postprocessed
-    :type estimator: An untrained estimator, default=SGDClassifier()
+    :type estimator: An untrained estimator, default=SGDClassifier(loss='log')
 
     :param grid_size: The number of ticks on the grid over which we evaluate the
         curves. A large grid_size means that we approximate the actual curve, so
@@ -314,7 +314,7 @@ class ThresholdOptimizer(ClassifierMixin, BaseEstimator):
     :type random_state: int, np.RandomState, default=None
     """
 
-    def __init__(self, *, estimator=SGDClassifier(),
+    def __init__(self, *, estimator=SGDClassifier(loss='log'),
                  constraints=DEMOGRAPHIC_PARITY,
                  grid_size=1000, flip=True, plot=False, warm_start=False,
                  random_state=None):