From 8de706151e183f448e1af9115770713d18e229f1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 15 May 2022 10:42:31 -0400 Subject: [PATCH] Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (#6598) --- doc/whats-new.rst | 3 +++ xarray/coding/times.py | 9 ++++++--- xarray/tests/test_coding_times.py | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 680c8219a38..c9ee52f3da0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -126,6 +126,9 @@ Bug fixes - :py:meth:`isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers. (:issue:`6554`, :pull:`6579`) By `Michael Niklas `_. +- Fixed silent overflow issue when decoding times encoded with 32-bit and below + unsigned integer data types (:issue:`6589`, :pull:`6598`). By `Spencer Clark + `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 42a815300e5..5cdd9472277 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -218,9 +218,12 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar): pd.to_timedelta(flat_num_dates.max(), delta) + ref_date # To avoid integer overflow when converting to nanosecond units for integer - # dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64 - # (GH 2002). - if flat_num_dates.dtype.kind == "i": + # dtypes smaller than np.int64 cast all integer and unsigned integer dtype + # arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case + # of np.uint64 values, because any np.uint64 value that would lead to + # overflow when converting to np.int64 would not be representable with a + # timedelta64 value, and therefore would raise an error in the lines above. + if flat_num_dates.dtype.kind in "iu": flat_num_dates = flat_num_dates.astype(np.int64) # Cast input ordinals to integers of nanoseconds because pd.to_timedelta diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index f10523aecbb..a5344fe4c85 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1121,3 +1121,30 @@ def test_should_cftime_be_used_target_not_npable(): ValueError, match="Calendar 'noleap' is only valid with cftime." ): _should_cftime_be_used(src, "noleap", False) + + +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) +def test_decode_cf_datetime_uint(dtype): + units = "seconds since 2018-08-22T03:23:03Z" + num_dates = dtype(50) + result = decode_cf_datetime(num_dates, units) + expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns")) + np.testing.assert_equal(result, expected) + + +@requires_cftime +def test_decode_cf_datetime_uint64_with_cftime(): + units = "days since 1700-01-01" + num_dates = np.uint64(182621) + result = decode_cf_datetime(num_dates, units) + expected = np.asarray(np.datetime64("2200-01-01", "ns")) + np.testing.assert_equal(result, expected) + + +@requires_cftime +def test_decode_cf_datetime_uint64_with_cftime_overflow_error(): + units = "microseconds since 1700-01-01" + calendar = "360_day" + num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) + with pytest.raises(OverflowError): + decode_cf_datetime(num_dates, units, calendar)