From d38e16be5024f81d0356bc09a49df1b699b1f563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 30 Oct 2025 21:35:52 +0000 Subject: [PATCH 1/4] feat: add DataFrame.resample and Series.resample --- bigframes/core/blocks.py | 5 ++++ bigframes/dataframe.py | 20 ++++++++++--- bigframes/series.py | 4 +-- tests/system/small/test_dataframe.py | 45 ++++++++++++++++++++-------- tests/system/small/test_series.py | 4 +-- tests/system/small/test_unordered.py | 12 +++++--- tests/unit/test_dataframe.py | 11 +++++++ tests/unit/test_series_polars.py | 4 +-- 8 files changed, 79 insertions(+), 26 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 1900b7208a..eb04b3ccb4 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1985,6 +1985,11 @@ def _generate_resample_label( Literal["epoch", "start", "start_day", "end", "end_day"], ] = "start_day", ) -> Block: + if not isinstance(rule, str): + raise NotImplementedError( + f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}" + ) + # Validate and resolve the index or column to use for grouping if on is None: if len(self.index_columns) == 0: diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index f016fddd83..29333b2417 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -4256,10 +4256,12 @@ def _split( return [DataFrame(block) for block in blocks] @validations.requires_ordering() - def _resample( + def resample( self, rule: str, *, + closed: Optional[Literal["right", "left"]] = None, + label: Optional[Literal["right", "left"]] = None, on: blocks.Label = None, level: Optional[LevelsType] = None, origin: Union[ @@ -4269,7 +4271,7 @@ def _resample( Literal["epoch", "start", "start_day", "end", "end_day"], ] = "start_day", ) -> bigframes.core.groupby.DataFrameGroupBy: - """Internal function to support resample. Resample time-series data. + """Resample time-series data. **Examples:** @@ -4285,7 +4287,7 @@ def _resample( Resample on a DataFrame with index: >>> df = bpd.DataFrame(data).set_index("timestamp_col") - >>> df._resample(rule="7s").min() + >>> df.resample(rule="7s").min() int64_col int64_too 2021-01-01 12:59:55 0 10 2021-01-01 13:00:02 2 12 @@ -4298,7 +4300,7 @@ def _resample( Resample with column and origin set to 'start': >>> df = bpd.DataFrame(data) - >>> df._resample(rule="7s", on = "timestamp_col", origin="start").min() + >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() int64_col int64_too 2021-01-01 13:00:00 0 10 2021-01-01 13:00:07 7 17 @@ -4311,6 +4313,14 @@ def _resample( Args: rule (str): The offset string representing target conversion. + closed (Literal['right'] | Literal['left'] | None): + Which side of bin interval is closed. The default is 'left' for + all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA', + 'BQE', and 'W' which all have a default of 'right'. + label (Literal['right'] | Literal['left'] | None): + Which bin edge label to label bucket with. The default is 'left' + for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', + 'BA', 'BQE', and 'W' which all have a default of 'right'. on (str, default None): For a DataFrame, column to use instead of index for resampling. Column must be datetime-like. @@ -4327,6 +4337,8 @@ def _resample( """ block = self._block._generate_resample_label( rule=rule, + closed=closed, + label=label, on=on, level=level, origin=origin, diff --git a/bigframes/series.py b/bigframes/series.py index ef0da32dfc..1c5dd3a930 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -2505,7 +2505,7 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: ) @validations.requires_ordering() - def _resample( + def resample( self, rule: str, *, @@ -2531,7 +2531,7 @@ def _resample( ... "int64_col": range(30), ... } >>> s = bpd.DataFrame(data).set_index("timestamp_col") - >>> s._resample(rule="7s", origin="epoch").min() + >>> s.resample(rule="7s", origin="epoch").min() int64_col 2021-01-01 12:59:56 0 2021-01-01 13:00:03 3 diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 79f8efd00f..a07d23131a 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -5923,7 +5923,7 @@ def test_dataframe_explode_xfail(col_names): ), ], ) -def test__resample_with_column( +def test_resample_with_column( scalars_df_index, scalars_pandas_df_index, on, rule, origin ): # TODO: supply a reason why this isn't compatible with pandas 1.x @@ -5943,30 +5943,51 @@ def test__resample_with_column( ) +@pytest.mark.parametrize("index_col", ["timestamp_col", "datetime_col"]) @pytest.mark.parametrize( - ("append", "level", "col", "rule"), + ("index_append", "level"), + [(True, 1), (False, None), (False, 0)], +) +@pytest.mark.parametrize( + "rule", [ - pytest.param(False, None, "timestamp_col", "100d"), - pytest.param(True, 1, "timestamp_col", "1200h"), - pytest.param(False, None, "datetime_col", "100d"), + # TODO(tswast): support timedeltas and dataoffsets + "100d", + "1200h", ], ) -def test__resample_with_index( - scalars_df_index, scalars_pandas_df_index, append, level, col, rule +@pytest.mark.parametrize("closed", ["left", "right", None]) +@pytest.mark.parametrize("label", ["left", "right", None]) +@pytest.mark.parametrize( + "origin", + ["epoch", "start", "start_day", "end", "end_day"], +) +def test_resample_with_index( + scalars_df_index, + scalars_pandas_df_index, + index_append, + level, + index_col, + rule, + closed, + origin, + label, ): # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") - scalars_df_index = scalars_df_index.set_index(col, append=append) - scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append) + scalars_df_index = scalars_df_index.set_index(index_col, append=index_append) + scalars_pandas_df_index = scalars_pandas_df_index.set_index( + index_col, append=index_append + ) bf_result = ( scalars_df_index[["int64_col", "int64_too"]] - ._resample(rule=rule, level=level) + .resample(rule=rule, level=level, closed=closed, origin=origin, label=label) .min() .to_pandas() ) pd_result = ( scalars_pandas_df_index[["int64_col", "int64_too"]] - .resample(rule=rule, level=level) + .resample(rule=rule, level=level, closed=closed, origin=origin, label=label) .min() ) assert_pandas_df_equal(bf_result, pd_result) @@ -6010,7 +6031,7 @@ def test__resample_with_index( ), ], ) -def test__resample_start_time(rule, origin, data): +def test_resample_start_time(rule, origin, data): # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") col = "timestamp_col" diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 5ace3f54d8..4df257423f 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -4856,14 +4856,14 @@ def test_series_explode_null(data): pytest.param(True, "timestamp_col", "timestamp_col", "1YE"), ], ) -def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule): +def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule): # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"] scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[ "int64_col" ] - bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas() + bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas() pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min() pd.testing.assert_series_equal(bf_result, pd_result) diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py index 9cfa54146a..07fdb215df 100644 --- a/tests/system/small/test_unordered.py +++ b/tests/system/small/test_unordered.py @@ -248,7 +248,7 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session): ), ], ) -def test__resample_with_index(unordered_session, rule, origin, data): +def test_resample_with_index(unordered_session, rule, origin, data): # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") col = "timestamp_col" @@ -256,12 +256,16 @@ def test__resample_with_index(unordered_session, rule, origin, data): scalars_pandas_df_index = pd.DataFrame(data).set_index(col) scalars_pandas_df_index.index.name = None - bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas() - + bf_result = scalars_df_index.resample(rule=rule, origin=origin).min() pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min() + assert isinstance(bf_result.index, bpd.DatetimeIndex) + assert isinstance(pd_result.index, pd.DatetimeIndex) pd.testing.assert_frame_equal( - bf_result, pd_result, check_dtype=False, check_index_type=False + bf_result.to_pandas(), + pd_result, + check_index_type=False, + check_dtype=False, ) diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py index 2326f2595b..2bbf112d52 100644 --- a/tests/unit/test_dataframe.py +++ b/tests/unit/test_dataframe.py @@ -42,6 +42,17 @@ def test_dataframe_repr_with_uninitialized_object(): assert "DataFrame" in got +@pytest.mark.parametrize("rule", [pd.DateOffset(weeks=1), pd.Timedelta(hours=8)]) +def test_dataframe_rule_not_implememented( + monkeypatch: pytest.MonkeyPatch, + rule, +): + dataframe = mocks.create_dataframe(monkeypatch) + + with pytest.raises(NotImplementedError, match="rule"): + dataframe.resample(rule=rule) + + def test_dataframe_setattr_with_uninitialized_object(): """Ensures DataFrame can be subclassed without trying to set attributes as columns.""" # Avoid calling __init__ since it might be called later in a subclass. diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 55bc048bcd..6f729b0df0 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -5006,14 +5006,14 @@ def test_series_explode_null(data): pytest.param(True, "timestamp_col", "timestamp_col", "1YE"), ], ) -def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule): +def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule): # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"] scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[ "int64_col" ] - bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas() + bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas() pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min() pd.testing.assert_series_equal(bf_result, pd_result) From e04393abbdc53088875d9658cc6a2da14c57f5c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 3 Nov 2025 21:47:49 +0000 Subject: [PATCH 2/4] raise for unsupported values --- bigframes/core/blocks.py | 20 +++++ bigframes/dataframe.py | 64 --------------- tests/system/small/test_dataframe.py | 11 ++- tests/unit/test_dataframe.py | 53 +++++++++++- .../bigframes_vendored/pandas/core/frame.py | 80 +++++++++++++++++++ 5 files changed, 159 insertions(+), 69 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 2ea2f0335b..b79b3f6caa 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1997,6 +1997,26 @@ def _generate_resample_label( f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}" ) + if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"): + raise NotImplementedError( + f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}" + ) + + if closed == "right": + raise NotImplementedError( + f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}", + ) + + if label == "right": + raise NotImplementedError( + f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}", + ) + + if origin not in ("epoch", "start", "start_day"): + raise NotImplementedError( + f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}" + ) + # Validate and resolve the index or column to use for grouping if on is None: if len(self.index_columns) == 0: diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 3d47788b78..7471cf587b 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -4197,70 +4197,6 @@ def resample( Literal["epoch", "start", "start_day", "end", "end_day"], ] = "start_day", ) -> bigframes.core.groupby.DataFrameGroupBy: - """Resample time-series data. - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> data = { - ... "timestamp_col": pd.date_range( - ... start="2021-01-01 13:00:00", periods=30, freq="1s" - ... ), - ... "int64_col": range(30), - ... "int64_too": range(10, 40), - ... } - - Resample on a DataFrame with index: - - >>> df = bpd.DataFrame(data).set_index("timestamp_col") - >>> df.resample(rule="7s").min() - int64_col int64_too - 2021-01-01 12:59:55 0 10 - 2021-01-01 13:00:02 2 12 - 2021-01-01 13:00:09 9 19 - 2021-01-01 13:00:16 16 26 - 2021-01-01 13:00:23 23 33 - - [5 rows x 2 columns] - - Resample with column and origin set to 'start': - - >>> df = bpd.DataFrame(data) - >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() - int64_col int64_too - 2021-01-01 13:00:00 0 10 - 2021-01-01 13:00:07 7 17 - 2021-01-01 13:00:14 14 24 - 2021-01-01 13:00:21 21 31 - 2021-01-01 13:00:28 28 38 - - [5 rows x 2 columns] - - Args: - rule (str): - The offset string representing target conversion. - closed (Literal['right'] | Literal['left'] | None): - Which side of bin interval is closed. The default is 'left' for - all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA', - 'BQE', and 'W' which all have a default of 'right'. - label (Literal['right'] | Literal['left'] | None): - Which bin edge label to label bucket with. The default is 'left' - for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', - 'BA', 'BQE', and 'W' which all have a default of 'right'. - on (str, default None): - For a DataFrame, column to use instead of index for resampling. Column - must be datetime-like. - level (str or int, default None): - For a MultiIndex, level (name or number) to use for resampling. - level must be datetime-like. - origin(str, default 'start_day'): - The timestamp on which to adjust the grouping. Must be one of the following: - 'epoch': origin is 1970-01-01 - 'start': origin is the first value of the timeseries - 'start_day': origin is the first day at midnight of the timeseries - Returns: - DataFrameGroupBy: DataFrameGroupBy object. - """ block = self._block._generate_resample_label( rule=rule, closed=closed, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index a07d23131a..d8a750aa75 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -5951,16 +5951,19 @@ def test_resample_with_column( @pytest.mark.parametrize( "rule", [ - # TODO(tswast): support timedeltas and dataoffsets + # TODO(tswast): support timedeltas and dataoffsets. + # TODO(tswast): support bins that default to "right". "100d", "1200h", ], ) -@pytest.mark.parametrize("closed", ["left", "right", None]) -@pytest.mark.parametrize("label", ["left", "right", None]) +# TODO(tswast): support "right" +@pytest.mark.parametrize("closed", ["left", None]) +# TODO(tswast): support "right" +@pytest.mark.parametrize("label", ["left", None]) @pytest.mark.parametrize( "origin", - ["epoch", "start", "start_day", "end", "end_day"], + ["epoch", "start", "start_day"], # TODO(tswast): support end, end_day. ) def test_resample_with_index( scalars_df_index, diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py index 2bbf112d52..015dbd030e 100644 --- a/tests/unit/test_dataframe.py +++ b/tests/unit/test_dataframe.py @@ -42,7 +42,23 @@ def test_dataframe_repr_with_uninitialized_object(): assert "DataFrame" in got -@pytest.mark.parametrize("rule", [pd.DateOffset(weeks=1), pd.Timedelta(hours=8)]) +@pytest.mark.parametrize( + "rule", + [ + pd.DateOffset(weeks=1), + pd.Timedelta(hours=8), + # According to + # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html + # these all default to "right" for closed and label, which isn't yet supported. + "ME", + "YE", + "QE", + "BME", + "BA", + "BQE", + "W", + ], +) def test_dataframe_rule_not_implememented( monkeypatch: pytest.MonkeyPatch, rule, @@ -53,6 +69,41 @@ def test_dataframe_rule_not_implememented( dataframe.resample(rule=rule) +def test_dataframe_closed_not_implememented( + monkeypatch: pytest.MonkeyPatch, +): + dataframe = mocks.create_dataframe(monkeypatch) + + with pytest.raises(NotImplementedError, match="Only closed='left'"): + dataframe.resample(rule="1d", closed="right") + + +def test_dataframe_label_not_implememented( + monkeypatch: pytest.MonkeyPatch, +): + dataframe = mocks.create_dataframe(monkeypatch) + + with pytest.raises(NotImplementedError, match="Only label='left'"): + dataframe.resample(rule="1d", label="right") + + +@pytest.mark.parametrize( + "origin", + [ + "end", + "end_day", + ], +) +def test_dataframe_origin_not_implememented( + monkeypatch: pytest.MonkeyPatch, + origin, +): + dataframe = mocks.create_dataframe(monkeypatch) + + with pytest.raises(NotImplementedError, match="origin"): + dataframe.resample(rule="1d", origin=origin) + + def test_dataframe_setattr_with_uninitialized_object(): """Ensures DataFrame can be subclassed without trying to set attributes as columns.""" # Avoid calling __init__ since it might be called later in a subclass. diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index b434b51fb3..4de7453fc5 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -11,6 +11,7 @@ """ from __future__ import annotations +import datetime from typing import Hashable, Iterable, Literal, Optional, Sequence, Union from bigframes_vendored import constants @@ -4734,6 +4735,85 @@ def merge( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def resample( + self, + rule: str, + *, + closed: Optional[Literal["right", "left"]] = None, + label: Optional[Literal["right", "left"]] = None, + on=None, + level=None, + origin: Union[ + Union[pd.Timestamp, datetime.datetime, np.datetime64, int, float, str], + Literal["epoch", "start", "start_day", "end", "end_day"], + ] = "start_day", + ): + """Resample time-series data. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> data = { + ... "timestamp_col": pd.date_range( + ... start="2021-01-01 13:00:00", periods=30, freq="1s" + ... ), + ... "int64_col": range(30), + ... "int64_too": range(10, 40), + ... } + + Resample on a DataFrame with index: + + >>> df = bpd.DataFrame(data).set_index("timestamp_col") + >>> df.resample(rule="7s").min() + int64_col int64_too + 2021-01-01 12:59:55 0 10 + 2021-01-01 13:00:02 2 12 + 2021-01-01 13:00:09 9 19 + 2021-01-01 13:00:16 16 26 + 2021-01-01 13:00:23 23 33 + + [5 rows x 2 columns] + + Resample with column and origin set to 'start': + + >>> df = bpd.DataFrame(data) + >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() + int64_col int64_too + 2021-01-01 13:00:00 0 10 + 2021-01-01 13:00:07 7 17 + 2021-01-01 13:00:14 14 24 + 2021-01-01 13:00:21 21 31 + 2021-01-01 13:00:28 28 38 + + [5 rows x 2 columns] + + Args: + rule (str): + The offset string representing target conversion. + Offsets 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', and 'W' are *not* + supported. + closed (Literal['left'] | None): + Which side of bin interval is closed. The default is 'left' for + all supported frequency offsets. + label (Literal['right'] | Literal['left'] | None): + Which bin edge label to label bucket with. The default is 'left' + for all supported frequency offsets. + on (str, default None): + For a DataFrame, column to use instead of index for resampling. Column + must be datetime-like. + level (str or int, default None): + For a MultiIndex, level (name or number) to use for resampling. + level must be datetime-like. + origin(str, default 'start_day'): + The timestamp on which to adjust the grouping. Must be one of the following: + 'epoch': origin is 1970-01-01 + 'start': origin is the first value of the timeseries + 'start_day': origin is the first day at midnight of the timeseries + Origin values 'end' and 'end_day' are *not* supported. + Returns: + DataFrameGroupBy: DataFrameGroupBy object. + """ + def round(self, decimals): """ Round a DataFrame to a variable number of decimal places. From 73dce26eace070f75144af940e7aef3b3641c9cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 3 Nov 2025 22:15:18 +0000 Subject: [PATCH 3/4] add docstrings --- bigframes/series.py | 37 ----------- .../bigframes_vendored/pandas/core/frame.py | 57 +++++++++-------- .../bigframes_vendored/pandas/core/series.py | 64 +++++++++++++++++++ 3 files changed, 93 insertions(+), 65 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 1c5dd3a930..c11cc48394 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -2519,43 +2519,6 @@ def resample( Literal["epoch", "start", "start_day", "end", "end_day"], ] = "start_day", ) -> bigframes.core.groupby.SeriesGroupBy: - """Internal function to support resample. Resample time-series data. - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> data = { - ... "timestamp_col": pd.date_range( - ... start="2021-01-01 13:00:00", periods=30, freq="1s" - ... ), - ... "int64_col": range(30), - ... } - >>> s = bpd.DataFrame(data).set_index("timestamp_col") - >>> s.resample(rule="7s", origin="epoch").min() - int64_col - 2021-01-01 12:59:56 0 - 2021-01-01 13:00:03 3 - 2021-01-01 13:00:10 10 - 2021-01-01 13:00:17 17 - 2021-01-01 13:00:24 24 - - [5 rows x 1 columns] - - - Args: - rule (str): - The offset string representing target conversion. - level (str or int, default None): - For a MultiIndex, level (name or number) to use for resampling. - level must be datetime-like. - origin(str, default 'start_day'): - The timestamp on which to adjust the grouping. Must be one of the following: - 'epoch': origin is 1970-01-01 - 'start': origin is the first value of the timeseries - 'start_day': origin is the first day at midnight of the timeseries - Returns: - SeriesGroupBy: SeriesGroupBy object. - """ block = self._block._generate_resample_label( rule=rule, closed=closed, diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 4de7453fc5..1e90e2e210 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4752,40 +4752,40 @@ def resample( **Examples:** - >>> import bigframes.pandas as bpd - >>> data = { - ... "timestamp_col": pd.date_range( - ... start="2021-01-01 13:00:00", periods=30, freq="1s" - ... ), - ... "int64_col": range(30), - ... "int64_too": range(10, 40), - ... } + >>> import bigframes.pandas as bpd + >>> data = { + ... "timestamp_col": pd.date_range( + ... start="2021-01-01 13:00:00", periods=30, freq="1s" + ... ), + ... "int64_col": range(30), + ... "int64_too": range(10, 40), + ... } Resample on a DataFrame with index: - >>> df = bpd.DataFrame(data).set_index("timestamp_col") - >>> df.resample(rule="7s").min() - int64_col int64_too - 2021-01-01 12:59:55 0 10 - 2021-01-01 13:00:02 2 12 - 2021-01-01 13:00:09 9 19 - 2021-01-01 13:00:16 16 26 - 2021-01-01 13:00:23 23 33 - - [5 rows x 2 columns] + >>> df = bpd.DataFrame(data).set_index("timestamp_col") + >>> df.resample(rule="7s").min() + int64_col int64_too + 2021-01-01 12:59:55 0 10 + 2021-01-01 13:00:02 2 12 + 2021-01-01 13:00:09 9 19 + 2021-01-01 13:00:16 16 26 + 2021-01-01 13:00:23 23 33 + + [5 rows x 2 columns] Resample with column and origin set to 'start': - >>> df = bpd.DataFrame(data) - >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() - int64_col int64_too - 2021-01-01 13:00:00 0 10 - 2021-01-01 13:00:07 7 17 - 2021-01-01 13:00:14 14 24 - 2021-01-01 13:00:21 21 31 - 2021-01-01 13:00:28 28 38 - - [5 rows x 2 columns] + >>> df = bpd.DataFrame(data) + >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() + int64_col int64_too + 2021-01-01 13:00:00 0 10 + 2021-01-01 13:00:07 7 17 + 2021-01-01 13:00:14 14 24 + 2021-01-01 13:00:21 21 31 + 2021-01-01 13:00:28 28 38 + + [5 rows x 2 columns] Args: rule (str): @@ -4813,6 +4813,7 @@ def resample( Returns: DataFrameGroupBy: DataFrameGroupBy object. """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def round(self, decimals): """ diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8de1c10f93..2c0f493d81 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import datetime from typing import ( Hashable, IO, @@ -19,6 +20,7 @@ from bigframes_vendored.pandas.core.generic import NDFrame import numpy import numpy as np +import pandas as pd from pandas._typing import Axis, FilePath, NaPosition, WriteBuffer from pandas.api import extensions as pd_ext @@ -2502,6 +2504,68 @@ def replace( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def resample( + self, + rule: str, + *, + closed: Optional[Literal["right", "left"]] = None, + label: Optional[Literal["right", "left"]] = None, + level=None, + origin: Union[ + Union[pd.Timestamp, datetime.datetime, numpy.datetime64, int, float, str], + Literal["epoch", "start", "start_day", "end", "end_day"], + ] = "start_day", + ): + """Resample time-series data. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> data = { + ... "timestamp_col": pd.date_range( + ... start="2021-01-01 13:00:00", periods=30, freq="1s" + ... ), + ... "int64_col": range(30), + ... } + >>> s = bpd.DataFrame(data).set_index("timestamp_col") + >>> s.resample(rule="7s", origin="epoch").min() + int64_col + 2021-01-01 12:59:56 0 + 2021-01-01 13:00:03 3 + 2021-01-01 13:00:10 10 + 2021-01-01 13:00:17 17 + 2021-01-01 13:00:24 24 + + [5 rows x 1 columns] + + Args: + rule (str): + The offset string representing target conversion. + Offsets 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', and 'W' are *not* + supported. + closed (Literal['left'] | None): + Which side of bin interval is closed. The default is 'left' for + all supported frequency offsets. + label (Literal['right'] | Literal['left'] | None): + Which bin edge label to label bucket with. The default is 'left' + for all supported frequency offsets. + on (str, default None): + For a DataFrame, column to use instead of index for resampling. Column + must be datetime-like. + level (str or int, default None): + For a MultiIndex, level (name or number) to use for resampling. + level must be datetime-like. + origin(str, default 'start_day'): + The timestamp on which to adjust the grouping. Must be one of the following: + 'epoch': origin is 1970-01-01 + 'start': origin is the first value of the timeseries + 'start_day': origin is the first day at midnight of the timeseries + Origin values 'end' and 'end_day' are *not* supported. + Returns: + SeriesGroupBy: SeriesGroupBy object. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: """ Return a new Series with missing values removed. From 90ff23a21d62798543f5e2cfa0fe8b424211688a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Tue, 4 Nov 2025 18:30:35 +0000 Subject: [PATCH 4/4] fix dataframe tests --- tests/system/small/test_dataframe.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index d8a750aa75..475f98407b 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -5915,12 +5915,6 @@ def test_dataframe_explode_xfail(col_names): pytest.param("datetime_col", "5M", "epoch"), pytest.param("datetime_col", "3Q", "start_day"), pytest.param("datetime_col", "3YE", "start"), - pytest.param( - "int64_col", "100D", "start", marks=pytest.mark.xfail(raises=TypeError) - ), - pytest.param( - "datetime_col", "100D", "end", marks=pytest.mark.xfail(raises=ValueError) - ), ], ) def test_resample_with_column( @@ -5929,7 +5923,7 @@ def test_resample_with_column( # TODO: supply a reason why this isn't compatible with pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") bf_result = ( - scalars_df_index._resample(rule=rule, on=on, origin=origin)[ + scalars_df_index.resample(rule=rule, on=on, origin=origin)[ ["int64_col", "int64_too"] ] .max() @@ -6042,7 +6036,7 @@ def test_resample_start_time(rule, origin, data): scalars_pandas_df_index = pd.DataFrame(data).set_index(col) scalars_pandas_df_index.index.name = None - bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas() + bf_result = scalars_df_index.resample(rule=rule, origin=origin).min().to_pandas() pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()