Skip to content

Commit

Permalink
Add simple polars support (#1129)
Browse files Browse the repository at this point in the history
Co-authored-by: maximlt <mliquet@anaconda.com>
Co-authored-by: Maxime Liquet <35924738+maximlt@users.noreply.github.com>
  • Loading branch information
3 people committed Oct 2, 2023
1 parent 8062d52 commit 4e9436d
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 8 deletions.
14 changes: 14 additions & 0 deletions doc/index.md
Expand Up @@ -101,6 +101,20 @@ alt: Works with GeoPandas
align: center
---
:::
:::{tab-item} Polars
```python
import polars
import hvplot.polars
df_polars = polars.from_pandas(df)
df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species')
```
```{image} ./_static/home/dask.gif
---
alt: Works with Polars
align: center
---
:::
:::{tab-item} Intake
```python
Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/Introduction.ipynb
Expand Up @@ -8,6 +8,7 @@
"\n",
"* [Pandas](https://pandas.pydata.org): DataFrame, Series (columnar/tabular data)\n",
"* [Rapids cuDF](https://docs.rapids.ai/api/cudf/stable/): GPU DataFrame, Series (columnar/tabular data)\n",
"* [Polars](https://www.pola.rs/): Polars is a fast DataFrame library/in-memory query engine (columnar/tabular data)\n",
"* [Dask](https://www.dask.org): DataFrame, Series (distributed/out of core arrays and columnar data)\n",
"* [XArray](https://xarray.pydata.org): Dataset, DataArray (labelled multidimensional arrays)\n",
"* [Streamz](https://streamz.readthedocs.io): DataFrame(s), Series(s) (streaming columnar data)\n",
Expand Down
64 changes: 64 additions & 0 deletions hvplot/polars.py
@@ -0,0 +1,64 @@
"""Adds the `.hvplot` method to pl.DataFrame, pl.LazyFrame and pl.Series"""
import itertools

from hvplot import hvPlotTabular, post_patch
from hvplot.converter import HoloViewsConverter
from hvplot.util import is_list_like


class hvPlotTabularPolars(hvPlotTabular):
def _get_converter(self, x=None, y=None, kind=None, **kwds):
import polars as pl

params = dict(self._metadata, **kwds)
x = x or params.pop("x", None)
y = y or params.pop("y", None)
kind = kind or params.pop("kind", None)

# Find columns which should be converted for LazyDataFrame and DataFrame
if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)):
if params.get("hover_cols") == "all":
columns = list(self._data.columns)
else:
possible_columns = [
[v] if isinstance(v, str) else v
for v in params.values()
if isinstance(v, (str, list))
]
columns = (
set(self._data.columns) & set(itertools.chain(*possible_columns))
) or {self._data.columns[0]}
xs = x if is_list_like(x) else (x,)
ys = y if is_list_like(y) else (y,)
columns |= {*xs, *ys}
columns.discard(None)

if isinstance(self._data, pl.DataFrame):
data = self._data.select(columns).to_pandas()
elif isinstance(self._data, pl.Series):
data = self._data.to_pandas()
elif isinstance(self._data, pl.LazyFrame):
data = self._data.select(columns).collect().to_pandas()
else:
raise ValueError(
"Only Polars DataFrame, Series, and LazyFrame are supported"
)

return HoloViewsConverter(data, x, y, kind=kind, **params)


def patch(name="hvplot", extension="bokeh", logo=False):
try:
import polars as pl
except:
raise ImportError(
"Could not patch plotting API onto Polars. Polars could not be imported."
)
pl.api.register_dataframe_namespace(name)(hvPlotTabularPolars)
pl.api.register_series_namespace(name)(hvPlotTabularPolars)
pl.api.register_lazyframe_namespace(name)(hvPlotTabularPolars)

post_patch(extension, logo)


patch()
61 changes: 53 additions & 8 deletions hvplot/tests/plotting/testcore.py
@@ -1,21 +1,66 @@
import numpy as np
import pandas as pd
import hvplot.pandas # noqa

import pytest

@pytest.mark.parametrize("y", (
from hvplot import hvPlotTabular

try:
import polars as pl
import hvplot.polars # noqa
skip_polar = False
except ImportError:
class pl:
DataFrame = None
LazyFrame = None
Series = None
skip_polar = True


TYPES = {t for t in dir(hvPlotTabular) if not t.startswith("_")}
FRAME_TYPES = TYPES - {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"}
SERIES_TYPES = FRAME_TYPES - {"points", "polygons", "ohlc", "paths"}
frame_kinds = pytest.mark.parametrize("kind", FRAME_TYPES)
series_kinds = pytest.mark.parametrize("kind", SERIES_TYPES)

y_combinations = pytest.mark.parametrize("y", (
["A", "B", "C", "D"],
("A", "B", "C", "D"),
{"A", "B", "C", "D"},
np.array(["A", "B", "C", "D"]),
pd.Index(["A", "B", "C", "D"]),
pd.Series(["A", "B", "C", "D"]),
))
def test_diffent_input_types(y):
),
ids=lambda x: type(x).__name__
)


@frame_kinds
@y_combinations
def test_dataframe_pandas(kind, y):
df = pd._testing.makeDataFrame()
types = {t for t in dir(df.hvplot) if not t.startswith("_")}
ignore_types = {'bivariate', 'heatmap', 'hexbin', 'labels', 'vectorfield'}
df.hvplot(y=y, kind=kind)


@series_kinds
def test_series_pandas(kind):
ser = pd.Series(np.random.rand(10), name="A")
ser.hvplot(kind=kind)


@pytest.mark.skipif(skip_polar, reason="polars not installed")
@pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame))
@frame_kinds
@y_combinations
def test_dataframe_polars(kind, y, cast):
df = cast(pd._testing.makeDataFrame())
assert isinstance(df, cast)
df.hvplot(y=y, kind=kind)


for t in types - ignore_types:
df.hvplot(y=y, kind=t)
@pytest.mark.skipif(skip_polar, reason="polars not installed")
@series_kinds
def test_series_polars(kind):
ser = pl.Series(values=np.random.rand(10), name="A")
assert isinstance(ser, pl.Series)
ser.hvplot(kind=kind)
25 changes: 25 additions & 0 deletions hvplot/tests/testpatch.py
Expand Up @@ -100,3 +100,28 @@ def test_streamz_seriess_patched(self):
from streamz.dataframe import Random
random_df = Random()
self.assertIsInstance(random_df.groupby('x').sum().y.hvplot, hvPlotTabular)


class TestPatchPolars(TestCase):

def setUp(self):
try:
import polars as pl # noqa
except:
raise SkipTest('Polars not available')
import hvplot.polars # noqa

def test_polars_series_patched(self):
import polars as pl
pseries = pl.Series([0, 1, 2])
self.assertIsInstance(pseries.hvplot, hvPlotTabular)

def test_polars_dataframe_patched(self):
import polars as pl
pdf = pl.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
self.assertIsInstance(pdf.hvplot, hvPlotTabular)

def test_polars_lazyframe_patched(self):
import polars as pl
pldf = pl.LazyFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
self.assertIsInstance(pldf.hvplot, hvPlotTabular)
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -60,6 +60,7 @@ def get_setup_version(reponame):
'pooch',
'scipy',
'ipywidgets',
'polars',
]

# Dependencies required to run the notebooks
Expand Down

0 comments on commit 4e9436d

Please sign in to comment.