Skip to content

Commit

Permalink
refactor(formats): plumb through data_mapper and schema in both p…
Browse files Browse the repository at this point in the history
…andas and pyarrow formats
  • Loading branch information
jcrist committed Sep 6, 2024
1 parent 5c2eadc commit cbeb967
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 18 deletions.
48 changes: 35 additions & 13 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.formats.pandas import PandasData
from ibis.formats.pyarrow import PyArrowData


Expand Down Expand Up @@ -1261,20 +1262,31 @@ def to_pandas(self, **kwargs) -> pd.Series:
@public
class Scalar(Value):
def __pyarrow_result__(
self, table: pa.Table, data_mapper: type[PyArrowData] | None = None
self,
table: pa.Table,
*,
schema: sch.Schema | None = None,
data_mapper: type[PyArrowData] | None = None,
) -> pa.Scalar:
if data_mapper is None:
from ibis.formats.pyarrow import PyArrowData as data_mapper

return data_mapper.convert_scalar(table[0][0], self.type())
return data_mapper.convert_scalar(
table[0][0], self.type() if schema is None else schema.types[0]
)

def __pandas_result__(
self, df: pd.DataFrame, *, schema: sch.Schema | None = None
self,
df: pd.DataFrame,
*,
schema: sch.Schema | None = None,
data_mapper: type[PandasData] | None = None,
) -> Any:
from ibis.formats.pandas import PandasData
if data_mapper is None:
from ibis.formats.pandas import PandasData as data_mapper

return PandasData.convert_scalar(
df, self.type() if schema is None else schema[df.columns[0]]
return data_mapper.convert_scalar(
df, self.type() if schema is None else schema.types[0]
)

def __polars_result__(self, df: pl.DataFrame) -> Any:
Expand Down Expand Up @@ -1440,17 +1452,28 @@ def preview(
)

def __pyarrow_result__(
self, table: pa.Table, data_mapper: type[PyArrowData] | None = None
self,
table: pa.Table,
*,
schema: sch.Schema | None = None,
data_mapper: type[PyArrowData] | None = None,
) -> pa.Array | pa.ChunkedArray:
if data_mapper is None:
from ibis.formats.pyarrow import PyArrowData as data_mapper

return data_mapper.convert_column(table[0], self.type())
return data_mapper.convert_column(
table[0], self.type() if schema is None else schema.types[0]
)

def __pandas_result__(
self, df: pd.DataFrame, *, schema: sch.Schema | None = None
self,
df: pd.DataFrame,
*,
schema: sch.Schema | None = None,
data_mapper: type[PandasData] | None = None,
) -> pd.Series:
from ibis.formats.pandas import PandasData
if data_mapper is None:
from ibis.formats.pandas import PandasData as data_mapper

assert (
len(df.columns) == 1
Expand All @@ -1463,9 +1486,8 @@ def __pandas_result__(
# df.loc[:, column_name] returns the special GeoSeries object.
#
# this bug is fixed in later versions of geopandas
(column,) = df.columns
return PandasData.convert_column(
df.loc[:, column], self.type() if schema is None else schema[column]
return data_mapper.convert_column(
df.loc[:, df.columns[0]], self.type() if schema is None else schema.types[0]
)

def __polars_result__(self, df: pl.DataFrame) -> pl.Series:
Expand Down
24 changes: 19 additions & 5 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from ibis.expr.types import Table
from ibis.expr.types.groupby import GroupedTable
from ibis.expr.types.temporal_windows import WindowedTable
from ibis.formats.pandas import PandasData
from ibis.formats.pyarrow import PyArrowData
from ibis.selectors import IfAnyAll

Expand Down Expand Up @@ -198,19 +199,32 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
return self.to_pyarrow().__arrow_c_stream__(requested_schema)

def __pyarrow_result__(
self, table: pa.Table, data_mapper: type[PyArrowData] | None = None
self,
table: pa.Table,
*,
schema: sch.Schema | None = None,
data_mapper: type[PyArrowData] | None = None,
) -> pa.Table:
if data_mapper is None:
from ibis.formats.pyarrow import PyArrowData as data_mapper

return data_mapper.convert_table(table, self.schema())
return data_mapper.convert_table(
table, self.schema() if schema is None else schema
)

def __pandas_result__(
self, df: pd.DataFrame, schema: sch.Schema | None = None
self,
df: pd.DataFrame,
*,
schema: sch.Schema | None = None,
data_mapper: type[PandasData] | None = None,
) -> pd.DataFrame:
from ibis.formats.pandas import PandasData
if data_mapper is None:
from ibis.formats.pandas import PandasData as data_mapper

return PandasData.convert_table(df, self.schema() if schema is None else schema)
return data_mapper.convert_table(
df, self.schema() if schema is None else schema
)

def __polars_result__(self, df: pl.DataFrame) -> Any:
from ibis.formats.polars import PolarsData
Expand Down

0 comments on commit cbeb967

Please sign in to comment.