diff --git a/doc/index.md b/doc/index.md index 5860ba937..e5f810e81 100644 --- a/doc/index.md +++ b/doc/index.md @@ -101,6 +101,20 @@ alt: Works with GeoPandas align: center --- ::: +:::{tab-item} Polars +```python +import polars +import hvplot.polars + +df_polars = polars.from_pandas(df) +df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species') +``` +```{image} ./_static/home/dask.gif +--- +alt: Works with Polars +align: center +--- +::: :::{tab-item} Intake ```python diff --git a/examples/user_guide/Introduction.ipynb b/examples/user_guide/Introduction.ipynb index 825e4dc61..9a7a01e2b 100644 --- a/examples/user_guide/Introduction.ipynb +++ b/examples/user_guide/Introduction.ipynb @@ -8,6 +8,7 @@ "\n", "* [Pandas](https://pandas.pydata.org): DataFrame, Series (columnar/tabular data)\n", "* [Rapids cuDF](https://docs.rapids.ai/api/cudf/stable/): GPU DataFrame, Series (columnar/tabular data)\n", + "* [Polars](https://www.pola.rs/): Polars is a fast DataFrame library/in-memory query engine (columnar/tabular data)\n", "* [Dask](https://www.dask.org): DataFrame, Series (distributed/out of core arrays and columnar data)\n", "* [XArray](https://xarray.pydata.org): Dataset, DataArray (labelled multidimensional arrays)\n", "* [Streamz](https://streamz.readthedocs.io): DataFrame(s), Series(s) (streaming columnar data)\n", diff --git a/hvplot/polars.py b/hvplot/polars.py new file mode 100644 index 000000000..8ec9d4ff7 --- /dev/null +++ b/hvplot/polars.py @@ -0,0 +1,64 @@ +"""Adds the `.hvplot` method to pl.DataFrame, pl.LazyFrame and pl.Series""" +import itertools + +from hvplot import hvPlotTabular, post_patch +from hvplot.converter import HoloViewsConverter +from hvplot.util import is_list_like + + +class hvPlotTabularPolars(hvPlotTabular): + def _get_converter(self, x=None, y=None, kind=None, **kwds): + import polars as pl + + params = dict(self._metadata, **kwds) + x = x or params.pop("x", None) + y = y or params.pop("y", None) + kind = kind or params.pop("kind", None) + + # Find columns which should be converted for LazyDataFrame and DataFrame + if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)): + if params.get("hover_cols") == "all": + columns = list(self._data.columns) + else: + possible_columns = [ + [v] if isinstance(v, str) else v + for v in params.values() + if isinstance(v, (str, list)) + ] + columns = ( + set(self._data.columns) & set(itertools.chain(*possible_columns)) + ) or {self._data.columns[0]} + xs = x if is_list_like(x) else (x,) + ys = y if is_list_like(y) else (y,) + columns |= {*xs, *ys} + columns.discard(None) + + if isinstance(self._data, pl.DataFrame): + data = self._data.select(columns).to_pandas() + elif isinstance(self._data, pl.Series): + data = self._data.to_pandas() + elif isinstance(self._data, pl.LazyFrame): + data = self._data.select(columns).collect().to_pandas() + else: + raise ValueError( + "Only Polars DataFrame, Series, and LazyFrame are supported" + ) + + return HoloViewsConverter(data, x, y, kind=kind, **params) + + +def patch(name="hvplot", extension="bokeh", logo=False): + try: + import polars as pl + except: + raise ImportError( + "Could not patch plotting API onto Polars. Polars could not be imported." + ) + pl.api.register_dataframe_namespace(name)(hvPlotTabularPolars) + pl.api.register_series_namespace(name)(hvPlotTabularPolars) + pl.api.register_lazyframe_namespace(name)(hvPlotTabularPolars) + + post_patch(extension, logo) + + +patch() diff --git a/hvplot/tests/plotting/testcore.py b/hvplot/tests/plotting/testcore.py index a2ed1ea3a..a65afd600 100644 --- a/hvplot/tests/plotting/testcore.py +++ b/hvplot/tests/plotting/testcore.py @@ -1,21 +1,66 @@ import numpy as np import pandas as pd import hvplot.pandas # noqa - import pytest -@pytest.mark.parametrize("y", ( +from hvplot import hvPlotTabular + +try: + import polars as pl + import hvplot.polars # noqa + skip_polar = False +except ImportError: + class pl: + DataFrame = None + LazyFrame = None + Series = None + skip_polar = True + + +TYPES = {t for t in dir(hvPlotTabular) if not t.startswith("_")} +FRAME_TYPES = TYPES - {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"} +SERIES_TYPES = FRAME_TYPES - {"points", "polygons", "ohlc", "paths"} +frame_kinds = pytest.mark.parametrize("kind", FRAME_TYPES) +series_kinds = pytest.mark.parametrize("kind", SERIES_TYPES) + +y_combinations = pytest.mark.parametrize("y", ( ["A", "B", "C", "D"], ("A", "B", "C", "D"), {"A", "B", "C", "D"}, np.array(["A", "B", "C", "D"]), pd.Index(["A", "B", "C", "D"]), pd.Series(["A", "B", "C", "D"]), - )) -def test_diffent_input_types(y): + ), + ids=lambda x: type(x).__name__ +) + + +@frame_kinds +@y_combinations +def test_dataframe_pandas(kind, y): df = pd._testing.makeDataFrame() - types = {t for t in dir(df.hvplot) if not t.startswith("_")} - ignore_types = {'bivariate', 'heatmap', 'hexbin', 'labels', 'vectorfield'} + df.hvplot(y=y, kind=kind) + + +@series_kinds +def test_series_pandas(kind): + ser = pd.Series(np.random.rand(10), name="A") + ser.hvplot(kind=kind) + + +@pytest.mark.skipif(skip_polar, reason="polars not installed") +@pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame)) +@frame_kinds +@y_combinations +def test_dataframe_polars(kind, y, cast): + df = cast(pd._testing.makeDataFrame()) + assert isinstance(df, cast) + df.hvplot(y=y, kind=kind) + - for t in types - ignore_types: - df.hvplot(y=y, kind=t) +@pytest.mark.skipif(skip_polar, reason="polars not installed") +@series_kinds +def test_series_polars(kind): + ser = pl.Series(values=np.random.rand(10), name="A") + assert isinstance(ser, pl.Series) + ser.hvplot(kind=kind) diff --git a/hvplot/tests/testpatch.py b/hvplot/tests/testpatch.py index 485e27e96..8e4912813 100644 --- a/hvplot/tests/testpatch.py +++ b/hvplot/tests/testpatch.py @@ -100,3 +100,28 @@ def test_streamz_seriess_patched(self): from streamz.dataframe import Random random_df = Random() self.assertIsInstance(random_df.groupby('x').sum().y.hvplot, hvPlotTabular) + + +class TestPatchPolars(TestCase): + + def setUp(self): + try: + import polars as pl # noqa + except: + raise SkipTest('Polars not available') + import hvplot.polars # noqa + + def test_polars_series_patched(self): + import polars as pl + pseries = pl.Series([0, 1, 2]) + self.assertIsInstance(pseries.hvplot, hvPlotTabular) + + def test_polars_dataframe_patched(self): + import polars as pl + pdf = pl.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + self.assertIsInstance(pdf.hvplot, hvPlotTabular) + + def test_polars_lazyframe_patched(self): + import polars as pl + pldf = pl.LazyFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + self.assertIsInstance(pldf.hvplot, hvPlotTabular) diff --git a/setup.py b/setup.py index 61b3c7195..dd57318ec 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_setup_version(reponame): 'pooch', 'scipy', 'ipywidgets', + 'polars', ] # Dependencies required to run the notebooks