Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH added agg argument to equalized odds difference and ratio to support "average odds" #960

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
42 changes: 34 additions & 8 deletions fairlearn/metrics/_fairness_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@


def equalized_odds_difference(
y_true, y_pred, *, sensitive_features, method="between_groups", sample_weight=None
y_true, y_pred, *, sensitive_features, method="between_groups",
sample_weight=None, agg="worst_case"
) -> float:
"""Calculate the equalized odds difference.

Expand All @@ -123,27 +124,40 @@
Predicted labels :math:`h(X)` returned by the classifier.

sensitive_features :
The sensitive features over which demographic parity should be assessed
The sensitive features over which equalized odds should be assessed

method : str
How to compute the differences. See :func:`fairlearn.metrics.MetricFrame.difference`
for details.
How to compute the differences.
See :func:`fairlearn.metrics.MetricFrame.difference` for details.

sample_weight : array-like
The sample weights

agg : str
The aggregation method. One of `"worst_case"` or `"mean"`.
If `"worst_case"`, the greater one of the false positive rate
difference and true positive rate difference is returned.
If `"mean"`, the mean of the differences is returned.

Returns
-------
float
The equalized odds difference
"""
if agg not in ["worst_case", "mean"]:
return ValueError(f"agg must be one of 'worst_case' or 'mean', got {agg}")

Check warning on line 148 in fairlearn/metrics/_fairness_metrics.py

View check run for this annotation

Codecov / codecov/patch

fairlearn/metrics/_fairness_metrics.py#L148

Added line #L148 was not covered by tests

eo = _get_eo_frame(y_true, y_pred, sensitive_features, sample_weight)

return max(eo.difference(method=method))
if agg == "worst_case":
return max(eo.difference(method=method))
else:
return eo.difference(method=method).mean()


def equalized_odds_ratio(
y_true, y_pred, *, sensitive_features, method="between_groups", sample_weight=None
y_true, y_pred, *, sensitive_features, method="between_groups",
sample_weight=None, agg="worst_case"
) -> float:
"""Calculate the equalized odds ratio.

Expand All @@ -166,7 +180,7 @@
Predicted labels :math:`h(X)` returned by the classifier.

sensitive_features :
The sensitive features over which demographic parity should be assessed
The sensitive features over which equalized odds should be assessed
romanlutz marked this conversation as resolved.
Show resolved Hide resolved

method : str
How to compute the differences. See :func:`fairlearn.metrics.MetricFrame.ratio`
Expand All @@ -175,14 +189,26 @@
sample_weight : array-like
The sample weights

agg : str
The aggregation method. One of `"worst_case"` or `"mean"`.
If `"worst_case"`, the smaller one of the false positive rate ratio
and true positive rate ratio is returned.
If `"mean"`, the mean of the ratios is returned.

Returns
-------
float
The equalized odds ratio
"""
if agg not in ["worst_case", "mean"]:
return ValueError(f"agg must be one of 'worst_case' or 'mean', got {agg}")

Check warning on line 204 in fairlearn/metrics/_fairness_metrics.py

View check run for this annotation

Codecov / codecov/patch

fairlearn/metrics/_fairness_metrics.py#L204

Added line #L204 was not covered by tests

eo = _get_eo_frame(y_true, y_pred, sensitive_features, sample_weight)

return min(eo.ratio(method=method))
if agg == "worst_case":
return min(eo.ratio(method=method))
else:
return eo.ratio(method=method).mean()


def _get_eo_frame(y_true, y_pred, sensitive_features, sample_weight) -> MetricFrame:
Expand Down
48 changes: 32 additions & 16 deletions test/unit/metrics/test_fairness_metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation and Fairlearn contributors.
# Licensed under the MIT License.

from itertools import product
import pytest

from fairlearn.metrics import (
Expand All @@ -17,6 +18,9 @@
from .data_for_test import g_1, s_w, y_p, y_t

_aggregate_methods = ["between_groups", "to_overall"]
_agg_options = ["worst_case", "mean"]
# cartesian product of the two lists
_agg_combinations = list(product(_aggregate_methods, _agg_options))


@pytest.mark.parametrize("agg_method", _aggregate_methods)
Expand Down Expand Up @@ -79,23 +83,26 @@ def test_demographic_parity_ratio_weighted(agg_method):
assert actual == gm.ratio(method=agg_method)


@pytest.mark.parametrize("agg_method", _aggregate_methods)
def test_equalized_odds_difference(agg_method):
@pytest.mark.parametrize("agg_method, agg", _agg_combinations)
def test_equalized_odds_difference(agg_method, agg):
actual = equalized_odds_difference(
y_t, y_p, sensitive_features=g_1, method=agg_method
y_t, y_p, sensitive_features=g_1, method=agg_method, agg=agg
)

metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}
gm = MetricFrame(metrics=metrics, y_true=y_t, y_pred=y_p, sensitive_features=g_1)

diffs = gm.difference(method=agg_method)
assert actual == diffs.max()
if agg == "worst_case":
assert actual == diffs.max()
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: for future maintainability it might be nicer to replace else by an explicit if agg == "mean".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm testing the options above, so if it's anything else we can't get here. Are you suggesting I don't check at the beginning but only here at the end? I guess I preferred the early return/raise pattern over doing all the calculations just to let users know that we won't in fact allow other inputs for agg.

Copy link
Contributor

@hildeweerts hildeweerts Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. I always like it when tests are very explicit so i don't have to scroll up to see if it makes sense, and it's a bit less error prone in case we ever decide to add another agg option. But it's truly a nitpick :)

Edit: i see now that i accidentally requested changes instead of approve, which makes it seem like i feel much more strongly about this than i do lol.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to wait for @MiroDudik's thoughts as well since you both had thoughts on the initial PR and perhaps he feels strongly either way 😎

assert actual == diffs.mean()


@pytest.mark.parametrize("agg_method", _aggregate_methods)
def test_equalized_odds_difference_weighted(agg_method):
@pytest.mark.parametrize("agg_method, agg", _agg_combinations)
def test_equalized_odds_difference_weighted(agg_method, agg):
actual = equalized_odds_difference(
y_t, y_p, sensitive_features=g_1, method=agg_method, sample_weight=s_w
y_t, y_p, sensitive_features=g_1, method=agg_method, sample_weight=s_w, agg=agg
)

metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}
Expand All @@ -110,24 +117,30 @@ def test_equalized_odds_difference_weighted(agg_method):
)

diffs = gm.difference(method=agg_method)
assert actual == diffs.max()
if agg == "worst_case":
assert actual == diffs.max()
else:
assert actual == diffs.mean()


@pytest.mark.parametrize("agg_method", _aggregate_methods)
def test_equalized_odds_ratio(agg_method):
actual = equalized_odds_ratio(y_t, y_p, method=agg_method, sensitive_features=g_1)
@pytest.mark.parametrize("agg_method, agg", _agg_combinations)
def test_equalized_odds_ratio(agg_method, agg):
actual = equalized_odds_ratio(y_t, y_p, method=agg_method, sensitive_features=g_1, agg=agg)

metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}
gm = MetricFrame(metrics=metrics, y_true=y_t, y_pred=y_p, sensitive_features=g_1)

ratios = gm.ratio(method=agg_method)
assert actual == ratios.min()
if agg == "worst_case":
assert actual == ratios.min()
else:
assert actual == ratios.mean()


@pytest.mark.parametrize("agg_method", _aggregate_methods)
def test_equalized_odds_ratio_weighted(agg_method):
@pytest.mark.parametrize("agg_method, agg", _agg_combinations)
def test_equalized_odds_ratio_weighted(agg_method, agg):
actual = equalized_odds_ratio(
y_t, y_p, method=agg_method, sensitive_features=g_1, sample_weight=s_w
y_t, y_p, method=agg_method, sensitive_features=g_1, sample_weight=s_w, agg=agg
)

metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}
Expand All @@ -142,4 +155,7 @@ def test_equalized_odds_ratio_weighted(agg_method):
)

ratios = gm.ratio(method=agg_method)
assert actual == ratios.min()
if agg == "worst_case":
assert actual == ratios.min()
else:
assert actual == ratios.mean()
Loading