10 changes: 4 additions & 6 deletions ibis/formats/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.formats.pandas import PandasConverter
from ibis.formats.pandas import DaskData

dd = pytest.importorskip("dask.dataframe")


from dask.dataframe.utils import tm # noqa: E402

from ibis.formats.pandas import schema_from_dask_dataframe # noqa: E402


@pytest.mark.parametrize(
('col_data', 'schema_type'),
Expand Down Expand Up @@ -50,7 +48,7 @@
)
def test_schema_infer_dataframe(col_data, schema_type):
df = dd.from_pandas(pd.DataFrame({'col': col_data}), npartitions=1)
inferred = schema_from_dask_dataframe(df)
inferred = DaskData.infer_table(df)
expected = ibis.schema([('col', schema_type)])
assert inferred == expected

Expand Down Expand Up @@ -196,13 +194,13 @@ def test_schema_infer_exhaustive_dataframe():
('year', dt.int64),
]

assert schema_from_dask_dataframe(df) == ibis.schema(expected)
assert DaskData.infer_table(df) == ibis.schema(expected)


def test_convert_dataframe_with_timezone():
data = {'time': pd.date_range('2018-01-01', '2018-01-02', freq='H')}
df = dd.from_pandas(pd.DataFrame(data), npartitions=2)
expected = df.assign(time=df.time.dt.tz_localize("EST"))
desired_schema = ibis.schema([('time', 'timestamp("EST")')])
result = PandasConverter.convert_frame(df.copy(), desired_schema)
result = DaskData.convert_table(df.copy(), desired_schema)
tm.assert_frame_equal(result.compute(), expected.compute())
29 changes: 12 additions & 17 deletions ibis/formats/tests/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,7 @@

import ibis.expr.datatypes as dt
import ibis.tests.strategies as ibst
from ibis.formats.numpy import (
dtype_from_numpy,
dtype_to_numpy,
schema_from_numpy,
schema_to_numpy,
)
from ibis.formats.numpy import NumpySchema, NumpyType

roundtripable_types = st.deferred(
lambda: (
Expand All @@ -37,11 +32,11 @@ def numpy_schema(draw, item_strategy=roundtripable_types, max_size=10):
def assert_dtype_roundtrip(
numpy_type, ibis_type=None, restored_type=None, nullable=True
):
dtype = dtype_from_numpy(numpy_type, nullable=nullable)
dtype = NumpyType.to_ibis(numpy_type, nullable=nullable)
if ibis_type is not None:
assert dtype == ibis_type

nptyp = dtype_to_numpy(dtype)
nptyp = NumpyType.from_ibis(dtype)
if restored_type is None:
restored_type = numpy_type
assert nptyp == restored_type
Expand Down Expand Up @@ -77,35 +72,35 @@ def test_non_roundtripable_bytes_type(numpy_type):
ibst.null_dtype | ibst.variadic_dtypes | ibst.decimal_dtype() | ibst.struct_dtypes()
)
def test_variadic_to_numpy(ibis_type):
assert dtype_to_numpy(ibis_type) == np.dtype("object")
assert NumpyType.from_ibis(ibis_type) == np.dtype("object")


@h.given(ibst.date_dtype | ibst.timestamp_dtype)
def test_date_to_numpy(ibis_type):
assert dtype_to_numpy(ibis_type) == np.dtype("datetime64[ns]")
assert NumpyType.from_ibis(ibis_type) == np.dtype("datetime64[ns]")


@h.given(ibst.time_dtype)
def test_time_to_numpy(ibis_type):
assert dtype_to_numpy(ibis_type) == np.dtype("timedelta64[ns]")
assert NumpyType.from_ibis(ibis_type) == np.dtype("timedelta64[ns]")


@h.given(ibst.schema())
def test_schema_to_numpy(ibis_schema):
numpy_schema = schema_to_numpy(ibis_schema)
numpy_schema = NumpySchema.from_ibis(ibis_schema)
assert len(numpy_schema) == len(ibis_schema)

for name, numpy_type in numpy_schema:
assert numpy_type == dtype_to_numpy(ibis_schema[name])
assert numpy_type == NumpyType.from_ibis(ibis_schema[name])


@h.given(numpy_schema())
def test_schema_from_numpy(numpy_schema):
ibis_schema = schema_from_numpy(numpy_schema)
ibis_schema = NumpySchema.to_ibis(numpy_schema)
assert len(numpy_schema) == len(ibis_schema)

for name, numpy_type in numpy_schema:
assert dtype_to_numpy(ibis_schema[name]) == numpy_type
assert NumpyType.from_ibis(ibis_schema[name]) == numpy_type


@pytest.mark.parametrize(
Expand All @@ -129,11 +124,11 @@ def test_schema_from_numpy(numpy_schema):
],
)
def test_dtype_from_numpy(numpy_dtype, ibis_dtype):
assert dtype_from_numpy(np.dtype(numpy_dtype)) == ibis_dtype
assert NumpyType.to_ibis(np.dtype(numpy_dtype)) == ibis_dtype


def test_dtype_from_numpy_dtype_timedelta():
if vparse(pytest.importorskip("pyarrow").__version__) < vparse("9"):
pytest.skip("pyarrow < 9 globally mutates the timedelta64 numpy dtype")

assert dtype_from_numpy(np.dtype(np.timedelta64)) == dt.interval
assert NumpyType.to_ibis(np.dtype(np.timedelta64)) == dt.interval
31 changes: 12 additions & 19 deletions ibis/formats/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,7 @@
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.formats.pandas import (
PandasConverter,
dtype_from_pandas,
dtype_to_pandas,
schema_from_pandas,
schema_from_pandas_dataframe,
schema_to_pandas,
)
from ibis.formats.pandas import PandasData, PandasSchema, PandasType


@pytest.mark.parametrize(
Expand Down Expand Up @@ -48,7 +41,7 @@
],
)
def test_dtype_to_pandas(pandas_type, ibis_type):
assert dtype_to_pandas(ibis_type) == pandas_type
assert PandasType.from_ibis(ibis_type) == pandas_type


@pytest.mark.parametrize(
Expand Down Expand Up @@ -77,17 +70,17 @@ def test_dtype_to_pandas(pandas_type, ibis_type):
)
def test_dtype_from_pandas_arrow_dtype(pandas_type, ibis_type):
series = pd.Series([], dtype=f"{pandas_type}[pyarrow]")
assert dtype_from_pandas(series.dtype) == ibis_type
assert PandasType.to_ibis(series.dtype) == ibis_type


def test_dtype_from_pandas_arrow_string_dtype():
series = pd.Series([], dtype="string[pyarrow]")
assert dtype_from_pandas(series.dtype) == dt.String()
assert PandasType.to_ibis(series.dtype) == dt.String()


def test_dtype_from_pandas_arrow_list_dtype():
series = pd.Series([], dtype=pd.ArrowDtype(pa.list_(pa.string())))
assert dtype_from_pandas(series.dtype) == dt.Array(dt.string)
assert PandasType.to_ibis(series.dtype) == dt.Array(dt.string)


@pytest.mark.parametrize(
Expand All @@ -113,7 +106,7 @@ def test_dtype_from_pandas_arrow_list_dtype():
ids=str,
)
def test_dtype_from_nullable_extension_dtypes(pandas_type, ibis_type):
assert dtype_from_pandas(pandas_type) == ibis_type
assert PandasType.to_ibis(pandas_type) == ibis_type


def test_schema_to_pandas():
Expand All @@ -125,7 +118,7 @@ def test_schema_to_pandas():
'd': dt.float64,
}
)
pandas_schema = schema_to_pandas(ibis_schema)
pandas_schema = PandasSchema.from_ibis(ibis_schema)

assert pandas_schema == [
('a', np.dtype('int64')),
Expand All @@ -143,7 +136,7 @@ def test_schema_from_pandas():
('d', np.dtype('float64')),
]

ibis_schema = schema_from_pandas(pandas_schema)
ibis_schema = PandasSchema.to_ibis(pandas_schema)
assert ibis_schema == sch.Schema(
{
'a': dt.int64,
Expand Down Expand Up @@ -291,7 +284,7 @@ def test_schema_from_dataframe():
]
)

assert schema_from_pandas_dataframe(df) == expected
assert PandasData.infer_table(df) == expected
assert sch.infer(df) == expected


Expand Down Expand Up @@ -325,7 +318,7 @@ def test_schema_from_dataframe_with_array_column():
]
)

assert schema_from_pandas_dataframe(df) == expected
assert PandasData.infer_table(df) == expected
assert sch.infer(df) == expected


Expand Down Expand Up @@ -421,7 +414,7 @@ def test_schema_from_dataframe_with_array_column():
def test_schema_from_various_dataframes(col_data, schema_type):
df = pd.DataFrame({'col': col_data})

inferred = schema_from_pandas_dataframe(df)
inferred = PandasData.infer_table(df)
expected = sch.Schema({'col': schema_type})
assert inferred == expected

Expand All @@ -432,5 +425,5 @@ def test_convert_dataframe_with_timezone():
time=lambda df: df.time.dt.tz_localize("EST")
)
desired_schema = ibis.schema(dict(time='timestamp("EST")'))
result = PandasConverter.convert_frame(df.copy(), desired_schema)
result = PandasData.convert_table(df.copy(), desired_schema)
tm.assert_frame_equal(expected, result)
31 changes: 13 additions & 18 deletions ibis/formats/tests/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,15 @@

import ibis.expr.datatypes as dt
from ibis.common.exceptions import IntegrityError
from ibis.formats.pyarrow import (
dtype_from_pyarrow,
dtype_to_pyarrow,
schema_from_pyarrow,
schema_to_pyarrow,
)
from ibis.formats.pyarrow import PyArrowSchema, PyArrowType


def assert_dtype_roundtrip(arrow_type, ibis_type=None, restored_type=None):
dtype = dtype_from_pyarrow(arrow_type, nullable=False)
dtype = PyArrowType.to_ibis(arrow_type, nullable=False)
if ibis_type is not None:
assert dtype == ibis_type

patyp = dtype_to_pyarrow(dtype)
patyp = PyArrowType.from_ibis(dtype)
if restored_type is None:
restored_type = arrow_type
assert patyp == restored_type
Expand Down Expand Up @@ -90,7 +85,7 @@ def test_timestamp_no_scale(timezone, nullable):

def test_month_day_nano_type_unsupported():
with pytest.raises(ValueError, match="Arrow interval type is not supported"):
dtype_from_pyarrow(pa.month_day_nano_interval())
PyArrowType.to_ibis(pa.month_day_nano_interval())


@pytest.mark.parametrize('value_nullable', [True, False])
Expand All @@ -99,8 +94,8 @@ def test_dtype_from_nullable_map_type(value_nullable):
pyarrow_type = pa.map_(
pa.int64(), pa.field('value', pa.int64(), nullable=value_nullable)
)
ibis_type = dtype_from_pyarrow(pyarrow_type)
restored_type = dtype_to_pyarrow(ibis_type)
ibis_type = PyArrowType.to_ibis(pyarrow_type)
restored_type = PyArrowType.from_ibis(ibis_type)

assert ibis_type == dt.Map(
dt.Int64(nullable=False), dt.Int64(nullable=value_nullable)
Expand All @@ -115,8 +110,8 @@ def test_dtype_from_nullable_map_type(value_nullable):
@pytest.mark.parametrize('list_nullable', [True, False])
def test_dtype_from_nullable_list_type(value_nullable, list_nullable):
pyarrow_type = pa.list_(pa.field('value', pa.int64(), nullable=value_nullable))
ibis_type = dtype_from_pyarrow(pyarrow_type, nullable=list_nullable)
restored_type = dtype_to_pyarrow(ibis_type)
ibis_type = PyArrowType.to_ibis(pyarrow_type, nullable=list_nullable)
restored_type = PyArrowType.from_ibis(ibis_type)

assert ibis_type == dt.Array(
dt.Int64(nullable=value_nullable), nullable=list_nullable
Expand All @@ -136,7 +131,7 @@ def test_dtype_from_nullable_list_type(value_nullable, list_nullable):
],
)
def test_ibis_exclusive_types(ibis_type, arrow_type):
assert dtype_to_pyarrow(ibis_type) == arrow_type
assert PyArrowType.from_ibis(ibis_type) == arrow_type


def test_schema_from_pyarrow_checks_duplicate_column_names():
Expand All @@ -147,18 +142,18 @@ def test_schema_from_pyarrow_checks_duplicate_column_names():
]
)
with pytest.raises(IntegrityError, match='Duplicate column name'):
schema_from_pyarrow(arrow_schema)
PyArrowSchema.to_ibis(arrow_schema)


@h.given(past.schemas(roundtripable_types))
def test_schema_roundtrip(pyarrow_schema):
unique_column_names = set(pyarrow_schema.names)
h.assume(len(unique_column_names) == len(pyarrow_schema.names))

ibis_schema = schema_from_pyarrow(pyarrow_schema)
restored = schema_to_pyarrow(ibis_schema)
ibis_schema = PyArrowSchema.to_ibis(pyarrow_schema)
restored = PyArrowSchema.from_ibis(ibis_schema)
assert pyarrow_schema.equals(restored)


def test_unknown_dtype_gets_converted_to_string():
assert dtype_to_pyarrow(dt.unknown) == pa.string()
assert PyArrowType.from_ibis(dt.unknown) == pa.string()
4 changes: 2 additions & 2 deletions ibis/tests/expr/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import ibis.expr.types as ir
from ibis.backends.base.sql import BaseSQLBackend
from ibis.backends.base.sql.alchemy import AlchemyCompiler
from ibis.backends.base.sql.alchemy.datatypes import dtype_to_sqlalchemy
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType
from ibis.expr.schema import Schema

MOCK_TABLES = {
Expand Down Expand Up @@ -438,7 +438,7 @@ def table_from_schema(name, meta, schema, *, database: str | None = None):
columns = []

for colname, dtype in zip(schema.names, schema.types):
satype = dtype_to_sqlalchemy(dtype)
satype = AlchemyType.from_ibis(dtype)
column = sa.Column(colname, satype, nullable=dtype.nullable)
columns.append(column)

Expand Down
4 changes: 2 additions & 2 deletions ibis/tests/sql/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import ibis
import ibis.expr.datatypes as dt
from ibis.backends.base.sql.alchemy import AlchemyCompiler, BaseAlchemyBackend
from ibis.backends.base.sql.alchemy.datatypes import ArrayType, dtype_to_sqlalchemy
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType, ArrayType
from ibis.tests.expr.mocks import MockAlchemyBackend
from ibis.tests.util import assert_decompile_roundtrip, assert_equal

Expand Down Expand Up @@ -553,7 +553,7 @@ def test_tpc_h11(snapshot):


def test_to_sqla_type_array_of_non_primitive():
result = dtype_to_sqlalchemy(dt.Array(dt.Struct(dict(a="int"))))
result = AlchemyType.from_ibis(dt.Array(dt.Struct(dict(a="int"))))
[(result_name, result_type)] = result.value_type.fields.items()
expected_name = "a"
assert result_name == expected_name
Expand Down