/
_balanced_root_mean_squared_error.py
46 lines (34 loc) · 1.54 KB
/
_balanced_root_mean_squared_error.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import math
import numpy as np
import sklearn.metrics as skm
from ._input_manipulations import _convert_to_ndarray_and_squeeze
_Y_TRUE_NOT_0_1 = "Only 0 and 1 are allowed in y_true and both must be present"
def balanced_root_mean_squared_error(y_true, y_pred, sample_weight=None):
r"""Calculate the mean of the root mean squared error (RMSE) for the positive and negative cases.
Used for binary logistic regression, this computes the error as
.. math::
\frac{\text{RMSE}(Y=0) + \text{RMSE}(Y=1)}{2}
The classes are constrained to be :math:`\in {0, 1}`. The :code:`y_true` values must
always be one of these, while :code:`y_pred` can be a continuous probability
(which could be thresholded to get a predicted class).
Internally, this builds on the
:py:func:`sklearn.metrics.mean_squared_error` routine.
"""
y_ta = _convert_to_ndarray_and_squeeze(y_true)
y_pa = _convert_to_ndarray_and_squeeze(y_pred)
s_w = np.ones(len(y_ta))
if sample_weight is not None:
s_w = _convert_to_ndarray_and_squeeze(sample_weight)
y_ta_values = np.unique(y_ta)
if not np.array_equal(y_ta_values, [0, 1]):
raise ValueError(_Y_TRUE_NOT_0_1)
errs = np.zeros(2)
for i in range(0, 2):
indices = (y_ta == i)
y_ta_s = y_ta[indices]
y_pa_s = y_pa[indices]
s_w_s = s_w[indices]
errs[i] = math.sqrt(skm.mean_squared_error(y_ta_s, y_pa_s, s_w_s))
return errs.mean()