ibis-project · jreback · Nov 1, 2019 · Oct 24, 2019 · Oct 24, 2019 · Oct 24, 2019
diff --git a/docs/source/release.rst b/docs/source/release.rst
@@ -7,6 +7,8 @@ Release Notes
    These release notes are for versions of ibis **1.0 and later**. Release
    notes for pre-1.0 versions of ibis can be found at :doc:`/release-pre-1.0`
 
+* :release:`1.2.1 <pending>`
+* :bug:`2009` Fix pandas backend to treat trailing_window preceding arg as window bound rather than window size (e.g. preceding=0 now indicates current row rather than window size 0)
 * :release:`1.2.0 <2019-06-24>`
 * :feature:`1836` Add new geospatial functions to OmniSciDB backend
 * :support:`1847` Skip SQLAlchemy backend tests in connect method in backends.py

diff --git a/ibis/expr/window.py b/ibis/expr/window.py
@@ -420,7 +420,8 @@ def trailing_window(preceding, group_by=None, order_by=None):
     preceding : int, float or expression of intervals, i.e.
         ibis.interval(days=1) + ibis.interval(hours=5)
         Int indicates number of trailing rows to include;
-        0 includes only the current row.
+        0 includes only the current row, 1 includes the current row and one
+        preceding row.
         Interval indicates a trailing range window.
     group_by : expressions, default None
         Either specify here or with TableExpr.group_by

diff --git a/ibis/pandas/aggcontext.py b/ibis/pandas/aggcontext.py
@@ -297,7 +297,14 @@ def compute_window_spec(dtype, obj):
 
 @compute_window_spec.register(type(None))
 def compute_window_spec_none(_, obj):
-    return obj
+    """Helper method only used for row-based windows:
+
+    Window spec in ibis is an inclusive window bound. A bound of 0 indicates
+    the current row.
+    Window spec in Pandas indicates window size. Therefore, we must add 1
+    to the ibis window bound to get the expected behavior.
+    """
+    return obj + 1
 
 
 @compute_window_spec.register(dt.Interval)
@@ -306,11 +313,6 @@ def compute_window_spec_interval(_, expr):
     return pd.tseries.frequencies.to_offset(value)
 
 
-@compute_window_spec.register(dt.DataType)
-def compute_window_spec_expr(_, expr):
-    return ibis.pandas.execute(expr)
-
-
 class Window(AggregationContext):
     __slots__ = ('construct_window',)
 

diff --git a/ibis/pandas/execution/tests/test_window.py b/ibis/pandas/execution/tests/test_window.py
@@ -288,7 +288,7 @@ def test_batting_rolling(batting, batting_df, sort_kind):
     more_values = (
         batting_df[columns]
         .sort_values('yearID', kind=sort_kind)
-        .G.rolling(5, min_periods=1)
+        .G.rolling(6, min_periods=1)
         .sum()
         .astype('int64')
     )
@@ -311,7 +311,7 @@ def test_batting_rolling_partitioned(batting, batting_df, sort_kind):
         batting_df[columns]
         .set_index(order_by)
         .groupby(group_by)
-        .G.rolling(3, min_periods=1)
+        .G.rolling(4, min_periods=1)
         .sum()
         .rename('rolled')
     )

diff --git a/ibis/pandas/tests/test_udf.py b/ibis/pandas/tests/test_udf.py
@@ -264,7 +264,7 @@ def my_mean(series):
     result = expr.execute().sort_values(['key', 'a'])
     expected = df.sort_values(['key', 'a']).assign(
         rolled=lambda df: df.groupby('key')
-        .b.rolling(2, min_periods=1)
+        .b.rolling(3, min_periods=1)
         .mean()
         .reset_index(level=0, drop=True)
     )
@@ -286,7 +286,7 @@ def test_udaf_window_nan():
     result = expr.execute().sort_values(['key', 'a'])
     expected = df.sort_values(['key', 'a']).assign(
         rolled=lambda d: d.groupby('key')
-        .b.rolling(2, min_periods=1)
+        .b.rolling(3, min_periods=1)
         .mean()
         .reset_index(level=0, drop=True)
     )

diff --git a/ibis/spark/tests/test_udf.py b/ibis/spark/tests/test_udf.py
@@ -305,7 +305,7 @@ def my_mean(series):
     result = expr.execute()
     expected = df_random.sort_values(['key', 'a']).assign(
         rolled=lambda df: df.groupby('key')
-        .b.rolling(2, min_periods=1)
+        .b.rolling(3, min_periods=1)
         .mean()
         .reset_index(level=0, drop=True)
     )
@@ -323,7 +323,7 @@ def test_udaf_window_nan(con, t_nan, df_nan):
     result = expr.execute()
     expected = df_nan.sort_values(['key', 'a']).assign(
         rolled=lambda d: d.groupby('key')
-        .b.rolling(2, min_periods=1)
+        .b.rolling(3, min_periods=1)
         .mean()
         .reset_index(level=0, drop=True)
     )
@@ -338,7 +338,7 @@ def test_udaf_window_null(con, t_null, df_null):
     result = expr.execute()
     expected = df_null.sort_values(['key', 'a']).assign(
         rolled=lambda d: d.groupby('key')
-        .b.rolling(2, min_periods=1)
+        .b.rolling(3, min_periods=1)
         .mean()
         .reset_index(level=0, drop=True)
     )

diff --git a/ibis/tests/all/test_window.py b/ibis/tests/all/test_window.py
@@ -245,22 +245,34 @@ def test_bounded_following_window(backend, alltypes, df, con):
     backend.assert_series_equal(left, right)
 
 
-# TODO (ISSUE #2000): fix Csv, Pandas, and Parquet backends to have
-#                     inclusive preceding window boundary
-@pytest.mark.xfail_backends([Csv, Pandas, Parquet])
+@pytest.mark.parametrize(
+    'window_fn',
+    [
+        param(
+            lambda t: ibis.window(
+                preceding=2,
+                following=0,
+                group_by=[t.string_col],
+                order_by=[t.id],
+            ),
+            id='preceding-2-following-0',
+        ),
+        param(
+            lambda t: ibis.trailing_window(
+                preceding=2, group_by=[t.string_col], order_by=[t.id]
+            ),
+            id='trailing-2',
+        ),
+    ],
+)
 @pytest.mark.xfail_unsupported
-def test_bounded_preceding_window(backend, alltypes, df, con):
+def test_bounded_preceding_windows(backend, alltypes, df, con, window_fn):
     if not backend.supports_window_operations:
         pytest.skip(
             'Backend {} does not support window operations'.format(backend)
         )
 
-    window = ibis.window(
-        preceding=2,
-        following=0,
-        group_by=[alltypes.string_col],
-        order_by=[alltypes.id],
-    )
+    window = window_fn(alltypes)
 
     expr = alltypes.mutate(val=alltypes.double_col.sum().over(window))