From 8af1de8bf009b70039fb7030f3316f38c149fbce Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 7 Oct 2025 19:56:02 +0000 Subject: [PATCH 1/5] feat: Add barh, pie plot types --- bigframes/dataframe.py | 19 ++++ bigframes/operations/_matplotlib/__init__.py | 2 + bigframes/operations/_matplotlib/core.py | 30 +++++ bigframes/operations/plotting.py | 20 +++- bigframes/series.py | 19 ++++ .../system/small/operations/test_plotting.py | 46 ++++++++ .../pandas/plotting/_core.py | 103 ++++++++++++++++++ 7 files changed, 237 insertions(+), 2 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1bde29506d..87dd860b8f 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -5279,6 +5279,25 @@ def bar( bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self.plot.barh(x=x, y=y, **kwargs) + + barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh) + + def pie( + self, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self.plot.pie(y=y, **kwargs) + + pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie) + def scatter( self, x: typing.Optional[typing.Hashable] = None, diff --git a/bigframes/operations/_matplotlib/__init__.py b/bigframes/operations/_matplotlib/__init__.py index 5f99d3b50a..caacadf5fe 100644 --- a/bigframes/operations/_matplotlib/__init__.py +++ b/bigframes/operations/_matplotlib/__init__.py @@ -22,6 +22,8 @@ PLOT_CLASSES: dict[str, PLOT_TYPES] = { "area": core.AreaPlot, "bar": core.BarPlot, + "barh": core.BarhPlot, + "pie": core.PiePlot, "line": core.LinePlot, "scatter": core.ScatterPlot, "hist": hist.HistPlot, diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index a5f53b9f64..a7dfeeec31 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -112,6 +112,36 @@ def _sampling_warning_msg(self) -> typing.Optional[str]: ) +class BarhPlot(SamplingPlot): + @property + def _kind(self) -> typing.Literal["barh"]: + return "barh" + + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return ( + "To optimize plotting performance, your data has been downsampled to {sampling_n} " + "rows from the original {total_n} rows. This may result in some data points " + "not being displayed. For a more comprehensive view, consider pre-processing " + "your data by aggregating it or selecting the top categories." + ) + + +class PiePlot(SamplingPlot): + @property + def _kind(self) -> typing.Literal["pie"]: + return "pie" + + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return ( + "To optimize plotting performance, your data has been downsampled to {sampling_n} " + "rows from the original {total_n} rows. This may result in some data points " + "not being displayed. For a more comprehensive view, consider pre-processing " + "your data by aggregating it or selecting the top categories." + ) + + class LinePlot(SamplingPlot): @property def _kind(self) -> typing.Literal["line"]: diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py index a741ed5dd9..0bbe7868a7 100644 --- a/bigframes/operations/plotting.py +++ b/bigframes/operations/plotting.py @@ -25,8 +25,8 @@ class PlotAccessor(vendordt.PlotAccessor): __doc__ = vendordt.PlotAccessor.__doc__ - _common_kinds = ("line", "area", "hist", "bar") - _dataframe_kinds = ("scatter",) + _common_kinds = ("line", "area", "hist", "bar", "barh", "pie") + _dataframe_kinds = ("scatter", "hexbin,") _all_kinds = _common_kinds + _dataframe_kinds def __call__(self, **kwargs): @@ -82,6 +82,22 @@ def bar( ): return self(kind="bar", x=x, y=y, **kwargs) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self(kind="barh", x=x, y=y, **kwargs) + + def pie( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self(kind="pie", x=x, y=y, **kwargs) + def scatter( self, x: typing.Optional[typing.Hashable] = None, diff --git a/bigframes/series.py b/bigframes/series.py index 490298d8dd..f0df8702c5 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -2530,6 +2530,25 @@ def bar( bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self.plot.barh(x=x, y=y, **kwargs) + + barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh) + + def pie( + self, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self.plot.pie(y=y, **kwargs) + + pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie) + def _slice( self, start: typing.Optional[int] = None, diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index c2f3ba423f..efe7eb8985 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -264,6 +264,52 @@ def test_bar(scalars_dfs, col_names, alias): tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) +@pytest.mark.parametrize( + ("col_names", "alias"), + [ + pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"), + pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"), + pytest.param(["int64_col"], True, id="series_alias"), + pytest.param(["int64_col"], False, id="series"), + ], +) +def test_barh(scalars_dfs, col_names, alias): + scalars_df, scalars_pandas_df = scalars_dfs + if alias: + ax = scalars_df[col_names].barh() + else: + ax = scalars_df[col_names].plot.barh() + pd_ax = scalars_pandas_df[col_names].plot.barh() + tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) + tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) + for line, pd_line in zip(ax.lines, pd_ax.lines): + # Compare y coordinates between the lines + tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) + + +@pytest.mark.parametrize( + ("col_names", "alias"), + [ + pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"), + pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"), + pytest.param(["int64_col"], True, id="series_alias"), + pytest.param(["int64_col"], False, id="series"), + ], +) +def test_pie(scalars_dfs, col_names, alias): + scalars_df, scalars_pandas_df = scalars_dfs + if alias: + ax = scalars_df[col_names].abs().pie(y="int64_col") + else: + ax = scalars_df[col_names].abs().plot.pie(y="int64_col") + pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col") + tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) + tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) + for line, pd_line in zip(ax.lines, pd_ax.lines): + # Compare y coordinates between the lines + tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) + + @pytest.mark.parametrize( ("col_names", "alias"), [ diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py index 4ed5c8eb0b..b0c28ddfe9 100644 --- a/third_party/bigframes_vendored/pandas/plotting/_core.py +++ b/third_party/bigframes_vendored/pandas/plotting/_core.py @@ -275,6 +275,109 @@ def bar( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + """ + Draw a horizontal bar plot. + + This function calls `pandas.plot` to generate a plot with a random sample + of items. For consistent results, the random sampling is reproducible. + Use the `sampling_random_state` parameter to modify the sampling seed. + + **Examples:** + + Basic plot. + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) + >>> ax = df.plot.barh(x='lab', y='val', rot=0) + + Plot a whole dataframe to a barh plot. Each column is assigned a distinct color, + and each row is nested in a group along the horizontal axis. + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(rot=0) + + Plot stacked barh charts for the DataFrame. + + >>> ax = df.plot.barh(stacked=True) + + If you don’t like the default colours, you can specify how you’d like each column + to be colored. + + >>> axes = df.plot.barh( + ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} + ... ) + + Args: + x (label or position, optional): + Allows plotting of one column versus another. If not specified, the index + of the DataFrame is used. + y (label or position, optional): + Allows plotting of one column versus another. If not specified, all numerical + columns are used. + **kwargs: + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns: + matplotlib.axes.Axes or numpy.ndarray: + Area plot, or array of area plots if subplots is True. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def pie( + self, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + """ + Generate a pie plot. + + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. + + **Examples:** + + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the 'mass' column to the + pie function to get a pie plot. + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + + >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) + + Args: + y (int or label, optional): + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwargs: + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns: + matplotlib.axes.Axes or np.ndarray: + A NumPy array is returned when `subplots` is True. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def scatter( self, x: typing.Optional[typing.Hashable] = None, From 15266787dd6b740591f093cb56b7a7ec037419e2 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 7 Oct 2025 20:45:06 +0000 Subject: [PATCH 2/5] remote erroneous x arg from pie meth --- bigframes/operations/plotting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py index 0bbe7868a7..df0c138f0f 100644 --- a/bigframes/operations/plotting.py +++ b/bigframes/operations/plotting.py @@ -92,11 +92,10 @@ def barh( def pie( self, - x: typing.Optional[typing.Hashable] = None, y: typing.Optional[typing.Hashable] = None, **kwargs, ): - return self(kind="pie", x=x, y=y, **kwargs) + return self(kind="pie", y=y, **kwargs) def scatter( self, From 2d2fcc024db935859a3baf2d0a5663c4a255e007 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 8 Oct 2025 23:45:48 +0000 Subject: [PATCH 3/5] refactor _sampling_warning_msg --- bigframes/operations/_matplotlib/core.py | 42 ++++++++---------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index a7dfeeec31..06fb5235d7 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -55,7 +55,12 @@ def _kind(self): @property def _sampling_warning_msg(self) -> typing.Optional[str]: - return None + return ( + "To optimize plotting performance, your data has been downsampled to {sampling_n} " + "rows from the original {total_n} rows. This may result in some data points " + "not being displayed. For a more comprehensive view, consider pre-processing " + "your data by aggregating it or selecting the top categories." + ) def __init__(self, data, **kwargs) -> None: self.kwargs = kwargs @@ -92,6 +97,10 @@ def _compute_plot_data(self): class AreaPlot(SamplingPlot): + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return None + @property def _kind(self) -> typing.Literal["area"]: return "area" @@ -102,45 +111,18 @@ class BarPlot(SamplingPlot): def _kind(self) -> typing.Literal["bar"]: return "bar" - @property - def _sampling_warning_msg(self) -> typing.Optional[str]: - return ( - "To optimize plotting performance, your data has been downsampled to {sampling_n} " - "rows from the original {total_n} rows. This may result in some data points " - "not being displayed. For a more comprehensive view, consider pre-processing " - "your data by aggregating it or selecting the top categories." - ) - class BarhPlot(SamplingPlot): @property def _kind(self) -> typing.Literal["barh"]: return "barh" - @property - def _sampling_warning_msg(self) -> typing.Optional[str]: - return ( - "To optimize plotting performance, your data has been downsampled to {sampling_n} " - "rows from the original {total_n} rows. This may result in some data points " - "not being displayed. For a more comprehensive view, consider pre-processing " - "your data by aggregating it or selecting the top categories." - ) - class PiePlot(SamplingPlot): @property def _kind(self) -> typing.Literal["pie"]: return "pie" - @property - def _sampling_warning_msg(self) -> typing.Optional[str]: - return ( - "To optimize plotting performance, your data has been downsampled to {sampling_n} " - "rows from the original {total_n} rows. This may result in some data points " - "not being displayed. For a more comprehensive view, consider pre-processing " - "your data by aggregating it or selecting the top categories." - ) - class LinePlot(SamplingPlot): @property @@ -153,6 +135,10 @@ class ScatterPlot(SamplingPlot): def _kind(self) -> typing.Literal["scatter"]: return "scatter" + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return None + def __init__(self, data, **kwargs) -> None: super().__init__(data, **kwargs) From dc089f22058a88f4df430cc33b308355756e629c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 9 Oct 2025 18:35:18 +0000 Subject: [PATCH 4/5] remove df, series direct accessors --- bigframes/dataframe.py | 19 ------------------- bigframes/series.py | 19 ------------------- 2 files changed, 38 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 6bd4495b40..bc2bbb963b 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -5279,25 +5279,6 @@ def bar( bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar) - def barh( - self, - x: typing.Optional[typing.Hashable] = None, - y: typing.Optional[typing.Hashable] = None, - **kwargs, - ): - return self.plot.barh(x=x, y=y, **kwargs) - - barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh) - - def pie( - self, - y: typing.Optional[typing.Hashable] = None, - **kwargs, - ): - return self.plot.pie(y=y, **kwargs) - - pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie) - def scatter( self, x: typing.Optional[typing.Hashable] = None, diff --git a/bigframes/series.py b/bigframes/series.py index f0df8702c5..490298d8dd 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -2530,25 +2530,6 @@ def bar( bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar) - def barh( - self, - x: typing.Optional[typing.Hashable] = None, - y: typing.Optional[typing.Hashable] = None, - **kwargs, - ): - return self.plot.barh(x=x, y=y, **kwargs) - - barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh) - - def pie( - self, - y: typing.Optional[typing.Hashable] = None, - **kwargs, - ): - return self.plot.pie(y=y, **kwargs) - - pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie) - def _slice( self, start: typing.Optional[int] = None, From 2f0559e6fd5ca6f59efcc4bff3059822699f75f0 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 9 Oct 2025 19:43:58 +0000 Subject: [PATCH 5/5] remove tests --- .../system/small/operations/test_plotting.py | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index efe7eb8985..2585ac8e81 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -265,20 +265,15 @@ def test_bar(scalars_dfs, col_names, alias): @pytest.mark.parametrize( - ("col_names", "alias"), + ("col_names",), [ - pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"), - pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"), - pytest.param(["int64_col"], True, id="series_alias"), - pytest.param(["int64_col"], False, id="series"), + pytest.param(["int64_col", "float64_col", "int64_too"], id="df"), + pytest.param(["int64_col"], id="series"), ], ) -def test_barh(scalars_dfs, col_names, alias): +def test_barh(scalars_dfs, col_names): scalars_df, scalars_pandas_df = scalars_dfs - if alias: - ax = scalars_df[col_names].barh() - else: - ax = scalars_df[col_names].plot.barh() + ax = scalars_df[col_names].plot.barh() pd_ax = scalars_pandas_df[col_names].plot.barh() tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) @@ -288,20 +283,15 @@ def test_barh(scalars_dfs, col_names, alias): @pytest.mark.parametrize( - ("col_names", "alias"), + ("col_names",), [ - pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"), - pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"), - pytest.param(["int64_col"], True, id="series_alias"), - pytest.param(["int64_col"], False, id="series"), + pytest.param(["int64_col", "float64_col", "int64_too"], id="df"), + pytest.param(["int64_col"], id="series"), ], ) -def test_pie(scalars_dfs, col_names, alias): +def test_pie(scalars_dfs, col_names): scalars_df, scalars_pandas_df = scalars_dfs - if alias: - ax = scalars_df[col_names].abs().pie(y="int64_col") - else: - ax = scalars_df[col_names].abs().plot.pie(y="int64_col") + ax = scalars_df[col_names].abs().plot.pie(y="int64_col") pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col") tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())