Skip to content

Commit

Permalink
BUG: datetime rolling min/max segfaults when closed=left (pandas-dev#…
Browse files Browse the repository at this point in the history
…21704)

User reported that `df.rolling(to_offset('3D'), closed='left').max()`
segfaults when df has a datetime index. The bug was in PR pandas-dev#19549. In
that PR, in https://github.com/pandas-dev/pandas/blame/master/pandas/_libs/window.pyx#L1268
`i` is initialized to `endi[0]`, which is 0 when `closed=left`.
So in the next line when it tries to set `output[i-1]` it goes out of bounds.
In addition, there are 2 more bugs in the `roll_min_max` code.
The second bug is that for variable size windows, the `nobs` is never updated
when elements leave the window. The third bug is at the end of the fixed
window where all output elements up to `minp` are initialized to 0 if
the input is not float.

This PR fixes all three of the aforementioned bugs, at the cost of casting the
output array to floating point even if the input is integer. This is less
than ideal if the output has no NaNs, but is still consistent with roll_sum
behavior.
  • Loading branch information
changhiskhan committed Jul 11, 2018
1 parent 5d0daa0 commit 181d671
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 21 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ Groupby/Resample/Rolling
-
-

- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)

Sparse
^^^^^^

Expand Down
63 changes: 42 additions & 21 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1222,11 +1222,12 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
cdef:
numeric ai
bint is_variable, should_replace
int64_t N, i, removed, window_i
int64_t N, i, removed, window_i, close_offset
Py_ssize_t nobs = 0
deque Q[int64_t]
deque W[int64_t]
ndarray[int64_t] starti, endi
ndarray[numeric, ndim=1] output
ndarray[double_t, ndim=1] output
cdef:
int64_t* death
numeric* ring
Expand All @@ -1241,49 +1242,69 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
input, win,
minp, index, closed)

output = np.empty(N, dtype=input.dtype)
output = np.empty(N, dtype=float)

# The front should always be the minimum in the window
Q = deque[int64_t]()

if is_variable:

# need this to maintain the window for nobs calc
W = deque[int64_t]()
with nogil:

# This is using a modified version of the C++ code in this
# SO post: http://bit.ly/2nOoHlY
# The original impl didn't deal with variable window sizes
# So the code was optimized for that

for i from starti[0] <= i < endi[0]:
ai = init_mm(input[i], &nobs, is_max)

if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()
Q.push_back(i)

for i from endi[0] <= i < N:
output[i-1] = calc_mm(minp, nobs, input[Q.front()])
if endi[0] > starti[0]:
# right is closed
for i from starti[0] <= i < endi[0]:
ai = init_mm(input[i], &nobs, is_max)

if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()
Q.push_back(i)
W.push_back(i)
output[0] = calc_mm(minp, nobs, input[Q.front()])
close_offset = 0
else:
init_mm(input[endi[0]], &nobs, is_max)
Q.push_back(endi[0])
W.push_back(endi[0])
# if right is open then the first output is always NaN
output[0] = NaN
close_offset = 1

for i from endi[close_offset] <= i < N:
ai = init_mm(input[i], &nobs, is_max)

# Discard previous entries if we find new min or max
if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()

while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
# Remove elements leaving the window
while not Q.empty() and Q.front() <= (i - (endi[i] - starti[i])
- close_offset):
Q.pop_front()

Q.push_back(i)

output[N-1] = calc_mm(minp, nobs, input[Q.front()])
# Update the nobs
while not W.empty() and W.front() <= (i - (endi[i] - starti[i])
- close_offset):
remove_mm(input[W.front()], &nobs)
W.pop_front()

Q.push_back(i)
W.push_back(i)
output[i] = calc_mm(minp, nobs, input[Q.front()])
else:
# setup the rings of death!
ring = <numeric *>malloc(win * sizeof(numeric))
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,49 @@ def test_closed(self):
with pytest.raises(ValueError):
df.rolling(window=3, closed='neither')

def test_closed_min_max_datetime(self):
# see gh-21704
ser = pd.Series(data=np.arange(10),
index=pd.date_range('2000', periods=10))

result = ser.rolling('3D', closed='right').min()
expected = pd.Series([0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7],
index=ser.index)
tm.assert_series_equal(result, expected)

result = ser.rolling('3D', closed='both').min()
expected = pd.Series([0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6],
index=ser.index)
tm.assert_series_equal(result, expected)

result = ser.rolling('3D', closed='neither').min()
expected = pd.Series([np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7],
index=ser.index)
tm.assert_series_equal(result, expected)

# shouldn't segfault
result = ser.rolling('3D', closed='left').min()
expected = pd.Series([np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6],
index=ser.index)
tm.assert_series_equal(result, expected)

# uneven
ser = ser.drop(index=ser.index[[1, 5]])
result = ser.rolling('3D', closed='left').min()
expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6],
index=ser.index)
tm.assert_series_equal(result, expected)

def test_closed_min_max_minp(self):
# see gh-21704
ser = pd.Series(data=np.arange(10),
index=pd.date_range('2000', periods=10))
ser[ser.index[-3:]] = np.nan
result = ser.rolling('3D', min_periods=2, closed='left').min()
expected = pd.Series([np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan],
index=ser.index)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('roller', ['1s', 1])
def tests_empty_df_rolling(self, roller):
# GH 15819 Verifies that datetime and integer rolling windows can be
Expand Down

0 comments on commit 181d671

Please sign in to comment.