diff --git a/bigframes/operations/_matplotlib/__init__.py b/bigframes/operations/_matplotlib/__init__.py index 5f99d3b50a..caacadf5fe 100644 --- a/bigframes/operations/_matplotlib/__init__.py +++ b/bigframes/operations/_matplotlib/__init__.py @@ -22,6 +22,8 @@ PLOT_CLASSES: dict[str, PLOT_TYPES] = { "area": core.AreaPlot, "bar": core.BarPlot, + "barh": core.BarhPlot, + "pie": core.PiePlot, "line": core.LinePlot, "scatter": core.ScatterPlot, "hist": hist.HistPlot, diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index a5f53b9f64..06fb5235d7 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -55,7 +55,12 @@ def _kind(self): @property def _sampling_warning_msg(self) -> typing.Optional[str]: - return None + return ( + "To optimize plotting performance, your data has been downsampled to {sampling_n} " + "rows from the original {total_n} rows. This may result in some data points " + "not being displayed. For a more comprehensive view, consider pre-processing " + "your data by aggregating it or selecting the top categories." + ) def __init__(self, data, **kwargs) -> None: self.kwargs = kwargs @@ -92,6 +97,10 @@ def _compute_plot_data(self): class AreaPlot(SamplingPlot): + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return None + @property def _kind(self) -> typing.Literal["area"]: return "area" @@ -102,14 +111,17 @@ class BarPlot(SamplingPlot): def _kind(self) -> typing.Literal["bar"]: return "bar" + +class BarhPlot(SamplingPlot): @property - def _sampling_warning_msg(self) -> typing.Optional[str]: - return ( - "To optimize plotting performance, your data has been downsampled to {sampling_n} " - "rows from the original {total_n} rows. This may result in some data points " - "not being displayed. For a more comprehensive view, consider pre-processing " - "your data by aggregating it or selecting the top categories." - ) + def _kind(self) -> typing.Literal["barh"]: + return "barh" + + +class PiePlot(SamplingPlot): + @property + def _kind(self) -> typing.Literal["pie"]: + return "pie" class LinePlot(SamplingPlot): @@ -123,6 +135,10 @@ class ScatterPlot(SamplingPlot): def _kind(self) -> typing.Literal["scatter"]: return "scatter" + @property + def _sampling_warning_msg(self) -> typing.Optional[str]: + return None + def __init__(self, data, **kwargs) -> None: super().__init__(data, **kwargs) diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py index a741ed5dd9..df0c138f0f 100644 --- a/bigframes/operations/plotting.py +++ b/bigframes/operations/plotting.py @@ -25,8 +25,8 @@ class PlotAccessor(vendordt.PlotAccessor): __doc__ = vendordt.PlotAccessor.__doc__ - _common_kinds = ("line", "area", "hist", "bar") - _dataframe_kinds = ("scatter",) + _common_kinds = ("line", "area", "hist", "bar", "barh", "pie") + _dataframe_kinds = ("scatter", "hexbin,") _all_kinds = _common_kinds + _dataframe_kinds def __call__(self, **kwargs): @@ -82,6 +82,21 @@ def bar( ): return self(kind="bar", x=x, y=y, **kwargs) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self(kind="barh", x=x, y=y, **kwargs) + + def pie( + self, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + return self(kind="pie", y=y, **kwargs) + def scatter( self, x: typing.Optional[typing.Hashable] = None, diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index c2f3ba423f..2585ac8e81 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -264,6 +264,42 @@ def test_bar(scalars_dfs, col_names, alias): tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) +@pytest.mark.parametrize( + ("col_names",), + [ + pytest.param(["int64_col", "float64_col", "int64_too"], id="df"), + pytest.param(["int64_col"], id="series"), + ], +) +def test_barh(scalars_dfs, col_names): + scalars_df, scalars_pandas_df = scalars_dfs + ax = scalars_df[col_names].plot.barh() + pd_ax = scalars_pandas_df[col_names].plot.barh() + tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) + tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) + for line, pd_line in zip(ax.lines, pd_ax.lines): + # Compare y coordinates between the lines + tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) + + +@pytest.mark.parametrize( + ("col_names",), + [ + pytest.param(["int64_col", "float64_col", "int64_too"], id="df"), + pytest.param(["int64_col"], id="series"), + ], +) +def test_pie(scalars_dfs, col_names): + scalars_df, scalars_pandas_df = scalars_dfs + ax = scalars_df[col_names].abs().plot.pie(y="int64_col") + pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col") + tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) + tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) + for line, pd_line in zip(ax.lines, pd_ax.lines): + # Compare y coordinates between the lines + tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) + + @pytest.mark.parametrize( ("col_names", "alias"), [ diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py index 4ed5c8eb0b..b0c28ddfe9 100644 --- a/third_party/bigframes_vendored/pandas/plotting/_core.py +++ b/third_party/bigframes_vendored/pandas/plotting/_core.py @@ -275,6 +275,109 @@ def bar( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def barh( + self, + x: typing.Optional[typing.Hashable] = None, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + """ + Draw a horizontal bar plot. + + This function calls `pandas.plot` to generate a plot with a random sample + of items. For consistent results, the random sampling is reproducible. + Use the `sampling_random_state` parameter to modify the sampling seed. + + **Examples:** + + Basic plot. + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) + >>> ax = df.plot.barh(x='lab', y='val', rot=0) + + Plot a whole dataframe to a barh plot. Each column is assigned a distinct color, + and each row is nested in a group along the horizontal axis. + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(rot=0) + + Plot stacked barh charts for the DataFrame. + + >>> ax = df.plot.barh(stacked=True) + + If you don’t like the default colours, you can specify how you’d like each column + to be colored. + + >>> axes = df.plot.barh( + ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} + ... ) + + Args: + x (label or position, optional): + Allows plotting of one column versus another. If not specified, the index + of the DataFrame is used. + y (label or position, optional): + Allows plotting of one column versus another. If not specified, all numerical + columns are used. + **kwargs: + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns: + matplotlib.axes.Axes or numpy.ndarray: + Area plot, or array of area plots if subplots is True. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def pie( + self, + y: typing.Optional[typing.Hashable] = None, + **kwargs, + ): + """ + Generate a pie plot. + + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. + + **Examples:** + + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the 'mass' column to the + pie function to get a pie plot. + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + + >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) + + Args: + y (int or label, optional): + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwargs: + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns: + matplotlib.axes.Axes or np.ndarray: + A NumPy array is returned when `subplots` is True. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def scatter( self, x: typing.Optional[typing.Hashable] = None,