Skip to content

Commit

Permalink
Add ignore_nan option to mad_std
Browse files Browse the repository at this point in the history
(this is a squash of 38 commits with messages below:)

make mad_std nan-compatible

mad_std too

add changelog entry

np->numpy in docstrings & check numpy version (using 1.9, because
officially it was added to numpy then according to their docs...)

use astropy.utils.compat (thanks @bsipocz)

1_9, not 1P9

bump to 1.10 to avoid np 1.9 weirdness

attempt to work around older numpy versions using ma.median

add tests for scalarness and use np.ma.median except when axis=None.
Also, change 'a' to 'data'.  'a' is a bad variable name and led to a few
mistakes

add another test of madstd with nan & axes

add a warning about NaNs in arrays for np<1.10.  Remove a test for those
versions.

update docstrings & changelog.  Add deprecation of old argument

correct the code to match the comment

change order of bool checking to do expensive one last

Test appropriate numpy versions, check warnings, and use asked array comparison

switch to catch_warnigns

for numpy 1.13, treatment of NaNs in masked arrays changed

add GE_13 to __ALL__

xfail the masked array test to avoid supporting incorrect behavior
see
#5232 (review)

udpate the special cases as per @mwcraig's helpful chart!

fix the tests: the behavior of the function when given masked arrays vs
non-masked arrays has changed: we're now forcing the correct behavior
for earlier versions

implement @juliantaylor's  suggetions

test should no longer use np.ma.allclose (though I don't know why ma
wouldn't just work....)

this commit will fail: I've fixed the underlying issue but kept the
tests flexible using the wrong data type...

tests for array type

double backticks

a second instance of bacticks

had a case wrong: one of the warn cases should *not* return a NaN for
np<1.11 (which is why we're catching a warning there)

numpy 1.10 np.ma.median([1,2,nan,4,5]) returns nan by default

no more old numpy supports.  Mixed feelings about this

fix an import

cleanup to address the numpy <=1.8 deprecation

more cleanup

fix changelog

add a note about keepdims

whitespace fix

fix imports

trailing whitespace?!!?!?!
  • Loading branch information
keflavich committed May 15, 2017
1 parent 5e6c349 commit e96efa7
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,10 @@ New Features
- Added ``axis`` keyword to ``biweight_location`` and
``biweight_midvariance``. [#5127, #5158]

- ``median_absolute_deviation`` and ``mad_std`` have ``ignore_nan`` option
that will use ``np.ma.median`` with nans masked out or ``np.nanmedian``
instead of ``np.median`` when computing the median. [#5232]

- ``astropy.table``

- Allow renaming mixin columns. [#5469]
Expand Down
62 changes: 52 additions & 10 deletions astropy/stats/funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
import math
import numpy as np

from warnings import warn

from ..utils.compat import NUMPY_LT_1_10
from ..utils.decorators import deprecated_renamed_argument
from ..utils import isiterable
from ..extern.six.moves import range

Expand Down Expand Up @@ -718,22 +722,28 @@ def poisson_conf_interval(n, interval='root-n', sigma=1, background=0,
return conf_interval


def median_absolute_deviation(a, axis=None, func=None):
@deprecated_renamed_argument('a', 'data', '2.0')
def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
"""
Calculate the median absolute deviation (MAD).
The MAD is defined as ``median(abs(a - median(a)))``.
Parameters
----------
a : array-like
data : array-like
Input array or object that can be converted to an array.
axis : {int, sequence of int, None}, optional
Axis along which the MADs are computed. The default (`None`) is
to compute the MAD of the flattened array.
func : callable, optional
The function used to compute the median. Defaults to `numpy.ma.median`
for masked arrays, otherwise to `numpy.median`.
ignore_nan : bool
Ignore NaN values (treat them as if they are not in the array) when
computing the median. This will use `numpy.ma.median` if ``axis`` is
specified, or `numpy.nanmedian` if ``axis==None`` and numpy's version
is >1.10 because nanmedian is slightly faster in this case.
Returns
-------
Expand Down Expand Up @@ -765,26 +775,52 @@ def median_absolute_deviation(a, axis=None, func=None):
# See https://github.com/numpy/numpy/issues/7330 why using np.ma.median
# for normal arrays should not be done (summary: np.ma.median always
# returns an masked array even if the result should be scalar). (#4658)
if isinstance(a, np.ma.MaskedArray):
if isinstance(data, np.ma.MaskedArray):
is_masked = True
func = np.ma.median
if ignore_nan:
data = np.ma.masked_invalid(data)
elif ignore_nan:
is_masked = False
func = np.nanmedian
else:
is_masked = False
func = np.median
else:
is_masked = None

a = np.asanyarray(a)
a_median = func(a, axis=axis)
if not ignore_nan and NUMPY_LT_1_10 and np.any(np.isnan(data)):
warn("Numpy versions <1.10 will return a number rather than NaN for "
"the median of arrays containing NaNs. This behavior is "
"unlikely to be what you expect.")

data = np.asanyarray(data)
# np.nanmedian has `keepdims`, which is a good option if we're not allowing
# user-passed functions here
data_median = func(data, axis=axis)

# broadcast the median array before subtraction
if axis is not None:
if isiterable(axis):
for ax in sorted(list(axis)):
a_median = np.expand_dims(a_median, axis=ax)
data_median = np.expand_dims(data_median, axis=ax)
else:
a_median = np.expand_dims(a_median, axis=axis)
data_median = np.expand_dims(data_median, axis=axis)

result = func(np.abs(data - data_median), axis=axis, overwrite_input=True)

if axis is None and np.ma.isMaskedArray(result):
# return scalar version
result = result.item()
elif np.ma.isMaskedArray(result) and is_masked == False:
# if the input array was not a masked array, we don't want to return a
# masked array
result = result.filled(fill_value=np.nan)

return func(np.abs(a - a_median), axis=axis)
return result


def mad_std(data, axis=None, func=None):
def mad_std(data, axis=None, func=None, ignore_nan=False):
r"""
Calculate a robust standard deviation using the `median absolute
deviation (MAD)
Expand All @@ -811,6 +847,11 @@ def mad_std(data, axis=None, func=None):
func : callable, optional
The function used to compute the median. Defaults to `numpy.ma.median`
for masked arrays, otherwise to `numpy.median`.
ignore_nan : bool
Ignore NaN values (treat them as if they are not in the array) when
computing the median. This will use `numpy.ma.median` if ``axis`` is
specified, or `numpy.nanmedian` if ``axis=None`` and numpy's version is
>1.10 because nanmedian is slightly faster in this case.
Returns
-------
Expand All @@ -834,7 +875,8 @@ def mad_std(data, axis=None, func=None):
"""

# NOTE: 1. / scipy.stats.norm.ppf(0.75) = 1.482602218505602
return median_absolute_deviation(data, axis=axis, func=func) * 1.482602218505602
MAD = median_absolute_deviation(data, axis=axis, func=func, ignore_nan=ignore_nan)
return MAD * 1.482602218505602


def signal_to_noise_oir_ccd(t, source_eps, sky_eps, dark_eps, rd, npix,
Expand Down
67 changes: 67 additions & 0 deletions astropy/stats/tests/test_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from numpy.random import randn, normal
from numpy.testing import assert_equal
from numpy.testing.utils import assert_allclose
from astropy.utils.compat import NUMPY_LT_1_10, NUMPY_LT_1_11
from ...tests.helper import catch_warnings

try:
import scipy # pylint: disable=W0611
Expand Down Expand Up @@ -329,6 +331,48 @@ def test_mad_std():
data = normal(5, 2, size=(100, 100))
assert_allclose(funcs.mad_std(data), 2.0, rtol=0.05)

@pytest.mark.xfail('not NUMPY_LT_1_10')
def test_mad_std_scalar_return():
with NumpyRNGContext(12345):
data = normal(5, 2, size=(10, 10))
# make a masked array with no masked points
data = np.ma.masked_where(np.isnan(data), data)
rslt = funcs.mad_std(data)
# want a scalar result, NOT a masked array
assert np.isscalar(rslt)

data[5,5] = np.nan
rslt = funcs.mad_std(data, ignore_nan=True)
assert np.isscalar(rslt)
with catch_warnings() as warns:
rslt = funcs.mad_std(data)
assert np.isscalar(rslt)
assert not np.isnan(rslt)

def test_mad_std_warns():
with NumpyRNGContext(12345):
data = normal(5, 2, size=(10, 10))
data[5,5] = np.nan

with catch_warnings() as warns:
rslt = funcs.mad_std(data, ignore_nan=False)
if NUMPY_LT_1_10:
w = warns[0]
assert str(w.message).startswith("Numpy versions <1.10 will return")
else:
assert np.isnan(rslt)

def test_mad_std_withnan():
with NumpyRNGContext(12345):
data = np.empty([102,102])
data[:] = np.nan
data[1:-1,1:-1] = normal(5, 2, size=(100, 100))
assert_allclose(funcs.mad_std(data, ignore_nan=True), 2.0, rtol=0.05)

if not NUMPY_LT_1_10:
assert np.isnan(funcs.mad_std([1, 2, 3, 4, 5, np.nan]))
assert_allclose(funcs.mad_std([1, 2, 3, 4, 5, np.nan], ignore_nan=True),
1.482602218505602)

def test_mad_std_with_axis():
data = np.array([[1, 2, 3, 4],
Expand All @@ -340,6 +384,29 @@ def test_mad_std_with_axis():
assert_allclose(funcs.mad_std(data, axis=0), result_axis0)
assert_allclose(funcs.mad_std(data, axis=1), result_axis1)

def test_mad_std_with_axis_and_nan():
data = np.array([[1, 2, 3, 4, np.nan],
[4, 3, 2, 1, np.nan]])
# results follow data symmetry
result_axis0 = np.array([2.22390333, 0.74130111, 0.74130111,
2.22390333, np.nan])
result_axis1 = np.array([1.48260222, 1.48260222])

assert_allclose(funcs.mad_std(data, axis=0, ignore_nan=True), result_axis0)
assert_allclose(funcs.mad_std(data, axis=1, ignore_nan=True), result_axis1)

def test_mad_std_with_axis_and_nan_array_type():
# mad_std should return a masked array if given one, and not otherwise
data = np.array([[1, 2, 3, 4, np.nan],
[4, 3, 2, 1, np.nan]])

result = funcs.mad_std(data, axis=0, ignore_nan=True)
assert not np.ma.isMaskedArray(result)

data = np.ma.masked_where(np.isnan(data), data)
result = funcs.mad_std(data, axis=0, ignore_nan=True)
assert np.ma.isMaskedArray(result)


def test_gaussian_fwhm_to_sigma():
fwhm = (2.0 * np.sqrt(2.0 * np.log(2.0)))
Expand Down

0 comments on commit e96efa7

Please sign in to comment.