Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigframes/operations/_matplotlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
PLOT_CLASSES: dict[str, PLOT_TYPES] = {
"area": core.AreaPlot,
"bar": core.BarPlot,
"barh": core.BarhPlot,
"pie": core.PiePlot,
"line": core.LinePlot,
"scatter": core.ScatterPlot,
"hist": hist.HistPlot,
Expand Down
32 changes: 24 additions & 8 deletions bigframes/operations/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ def _kind(self):

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)

def __init__(self, data, **kwargs) -> None:
self.kwargs = kwargs
Expand Down Expand Up @@ -92,6 +97,10 @@ def _compute_plot_data(self):


class AreaPlot(SamplingPlot):
@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None

@property
def _kind(self) -> typing.Literal["area"]:
return "area"
Expand All @@ -102,14 +111,17 @@ class BarPlot(SamplingPlot):
def _kind(self) -> typing.Literal["bar"]:
return "bar"


class BarhPlot(SamplingPlot):
@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)
def _kind(self) -> typing.Literal["barh"]:
return "barh"


class PiePlot(SamplingPlot):
@property
def _kind(self) -> typing.Literal["pie"]:
return "pie"


class LinePlot(SamplingPlot):
Expand All @@ -123,6 +135,10 @@ class ScatterPlot(SamplingPlot):
def _kind(self) -> typing.Literal["scatter"]:
return "scatter"

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None

def __init__(self, data, **kwargs) -> None:
super().__init__(data, **kwargs)

Expand Down
19 changes: 17 additions & 2 deletions bigframes/operations/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
class PlotAccessor(vendordt.PlotAccessor):
__doc__ = vendordt.PlotAccessor.__doc__

_common_kinds = ("line", "area", "hist", "bar")
_dataframe_kinds = ("scatter",)
_common_kinds = ("line", "area", "hist", "bar", "barh", "pie")
_dataframe_kinds = ("scatter", "hexbin,")
_all_kinds = _common_kinds + _dataframe_kinds

def __call__(self, **kwargs):
Expand Down Expand Up @@ -82,6 +82,21 @@ def bar(
):
return self(kind="bar", x=x, y=y, **kwargs)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="barh", x=x, y=y, **kwargs)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="pie", y=y, **kwargs)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down
36 changes: 36 additions & 0 deletions tests/system/small/operations/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,42 @@ def test_bar(scalars_dfs, col_names, alias):
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names",),
[
pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
pytest.param(["int64_col"], id="series"),
],
)
def test_barh(scalars_dfs, col_names):
scalars_df, scalars_pandas_df = scalars_dfs
ax = scalars_df[col_names].plot.barh()
pd_ax = scalars_pandas_df[col_names].plot.barh()
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names",),
[
pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
pytest.param(["int64_col"], id="series"),
],
)
def test_pie(scalars_dfs, col_names):
scalars_df, scalars_pandas_df = scalars_dfs
ax = scalars_df[col_names].abs().plot.pie(y="int64_col")
pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col")
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
Expand Down
103 changes: 103 additions & 0 deletions third_party/bigframes_vendored/pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,109 @@ def bar(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Draw a horizontal bar plot.

This function calls `pandas.plot` to generate a plot with a random sample
of items. For consistent results, the random sampling is reproducible.
Use the `sampling_random_state` parameter to modify the sampling seed.

**Examples:**

Basic plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
>>> ax = df.plot.barh(x='lab', y='val', rot=0)

Plot a whole dataframe to a barh plot. Each column is assigned a distinct color,
and each row is nested in a group along the horizontal axis.

>>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
>>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
>>> index = ['snail', 'pig', 'elephant',
... 'rabbit', 'giraffe', 'coyote', 'horse']
>>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
>>> ax = df.plot.barh(rot=0)

Plot stacked barh charts for the DataFrame.

>>> ax = df.plot.barh(stacked=True)

If you don’t like the default colours, you can specify how you’d like each column
to be colored.

>>> axes = df.plot.barh(
... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
... )

Args:
x (label or position, optional):
Allows plotting of one column versus another. If not specified, the index
of the DataFrame is used.
y (label or position, optional):
Allows plotting of one column versus another. If not specified, all numerical
columns are used.
**kwargs:
Additional keyword arguments are documented in
:meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or numpy.ndarray:
Area plot, or array of area plots if subplots is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Generate a pie plot.

A pie plot is a proportional representation of the numerical data in a
column. This function wraps :meth:`matplotlib.pyplot.pie` for the
specified column. If no column reference is passed and
``subplots=True`` a pie plot is drawn for each numerical column
independently.

**Examples:**

In the example below we have a DataFrame with the information about
planet's mass and radius. We pass the 'mass' column to the
pie function to get a pie plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97],
... 'radius': [2439.7, 6051.8, 6378.1]},
... index=['Mercury', 'Venus', 'Earth'])
>>> plot = df.plot.pie(y='mass', figsize=(5, 5))

>>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

Args:
y (int or label, optional):
Label or position of the column to plot.
If not provided, ``subplots=True`` argument must be passed.
**kwargs:
Keyword arguments to pass on to :meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or np.ndarray:
A NumPy array is returned when `subplots` is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down