From 5b02581c2216ab237bc82e3683cdd9506ed3838c Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 2 Apr 2024 13:55:43 -0400 Subject: [PATCH] chore: deprecate register api chore: add comment in tests to be modify when we have read_in_memory chore: comment to rewrite or delete register test when removing api --- ibis/backends/datafusion/__init__.py | 6 ++- ibis/backends/datafusion/tests/conftest.py | 11 +++-- .../backends/datafusion/tests/test_connect.py | 8 +++- .../datafusion/tests/test_register.py | 18 ++++--- ibis/backends/duckdb/__init__.py | 5 ++ ibis/backends/duckdb/tests/test_client.py | 4 +- ibis/backends/duckdb/tests/test_register.py | 4 +- ibis/backends/polars/__init__.py | 6 ++- ibis/backends/polars/tests/conftest.py | 11 +++-- ibis/backends/pyspark/__init__.py | 5 ++ ibis/backends/tests/test_register.py | 47 +++++++++++++------ 11 files changed, 89 insertions(+), 36 deletions(-) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 03bf78cf2fa3d..6b7a5ac4fae60 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -26,7 +26,7 @@ from ibis.backends.sql.compiler import C from ibis.expr.operations.udf import InputType from ibis.formats.pyarrow import PyArrowType -from ibis.util import gen_name, normalize_filename +from ibis.util import deprecated, gen_name, normalize_filename try: from datafusion import ExecutionContext as SessionContext @@ -294,6 +294,10 @@ def get_schema( table = database.table(table_name) return sch.schema(table.schema) + @deprecated( + as_of="9.0", + instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.", + ) def register( self, source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame, diff --git a/ibis/backends/datafusion/tests/conftest.py b/ibis/backends/datafusion/tests/conftest.py index f41b4d92edd9d..b4ab69bbfbac5 100644 --- a/ibis/backends/datafusion/tests/conftest.py +++ b/ibis/backends/datafusion/tests/conftest.py @@ -24,10 +24,13 @@ def _load_data(self, **_: Any) -> None: con = self.connection for table_name in TEST_TABLES: path = self.data_dir / "parquet" / f"{table_name}.parquet" - con.register(path, table_name=table_name) - con.register(array_types, table_name="array_types") - con.register(win, table_name="win") - con.register(topk, table_name="topk") + with pytest.warns(FutureWarning, match="v9.0"): + con.register(path, table_name=table_name) + # TODO: remove warnings and replace register when implementing 8858 + with pytest.warns(FutureWarning, match="v9.0"): + con.register(array_types, table_name="array_types") + con.register(win, table_name="win") + con.register(topk, table_name="topk") @staticmethod def connect(*, tmpdir, worker_id, **kw): diff --git a/ibis/backends/datafusion/tests/test_connect.py b/ibis/backends/datafusion/tests/test_connect.py index 6b3773f8370f3..80cdb7964ae7e 100644 --- a/ibis/backends/datafusion/tests/test_connect.py +++ b/ibis/backends/datafusion/tests/test_connect.py @@ -25,13 +25,17 @@ def test_none_config(): def test_str_config(name_to_path): config = {name: str(path) for name, path in name_to_path.items()} - conn = ibis.datafusion.connect(config) + # if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register + with pytest.warns(FutureWarning, match="v9.0"): + conn = ibis.datafusion.connect(config) assert sorted(conn.list_tables()) == sorted(name_to_path) def test_path_config(name_to_path): config = name_to_path - conn = ibis.datafusion.connect(config) + # if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register + with pytest.warns(FutureWarning, match="v9.0"): + conn = ibis.datafusion.connect(config) assert sorted(conn.list_tables()) == sorted(name_to_path) diff --git a/ibis/backends/datafusion/tests/test_register.py b/ibis/backends/datafusion/tests/test_register.py index 0fb34d6d58e0d..62f629b2cbb38 100644 --- a/ibis/backends/datafusion/tests/test_register.py +++ b/ibis/backends/datafusion/tests/test_register.py @@ -25,24 +25,28 @@ def test_read_parquet(conn, data_dir): def test_register_table(conn): tab = pa.table({"x": [1, 2, 3]}) - conn.register(tab, "my_table") + with pytest.warns(FutureWarning, match="v9.0"): + conn.register(tab, "my_table") assert conn.table("my_table").x.sum().execute() == 6 def test_register_pandas(conn): df = pd.DataFrame({"x": [1, 2, 3]}) - conn.register(df, "my_table") - assert conn.table("my_table").x.sum().execute() == 6 + with pytest.warns(FutureWarning, match="v9.0"): + conn.register(df, "my_table") + assert conn.table("my_table").x.sum().execute() == 6 def test_register_batches(conn): batch = pa.record_batch([pa.array([1, 2, 3])], names=["x"]) - conn.register(batch, "my_table") - assert conn.table("my_table").x.sum().execute() == 6 + with pytest.warns(FutureWarning, match="v9.0"): + conn.register(batch, "my_table") + assert conn.table("my_table").x.sum().execute() == 6 def test_register_dataset(conn): tab = pa.table({"x": [1, 2, 3]}) dataset = ds.InMemoryDataset(tab) - conn.register(dataset, "my_table") - assert conn.table("my_table").x.sum().execute() == 6 + with pytest.warns(FutureWarning, match="v9.0"): + conn.register(dataset, "my_table") + assert conn.table("my_table").x.sum().execute() == 6 diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index d691d483d2c26..447b22c24518f 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -29,6 +29,7 @@ from ibis.backends.sql import SQLBackend from ibis.backends.sql.compiler import STAR, C, F from ibis.expr.operations.udf import InputType +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Iterable, Mapping, MutableMapping, Sequence @@ -515,6 +516,10 @@ def drop_database( with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)): pass + @deprecated( + as_of="9.0", + instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.", + ) def register( self, source: str | Path | Any, diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index e5519edc8bb19..73ec247318db1 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -268,7 +268,9 @@ def test_connect_duckdb(url, tmp_path): ) def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path): getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}") - con = ibis.connect(tmp_path / f"out.{extension}") + with pytest.warns(FutureWarning, match="v9.0"): + # ibis.connect uses con.register + con = ibis.connect(tmp_path / f"out.{extension}") t = next(iter(con.tables.values())) assert not t.head().execute().empty diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 3f4356a1b0187..e26d1ac325ef5 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -257,10 +257,10 @@ def test_read_sqlite_no_table_name(con, tmp_path): ) def test_register_sqlite(con, tmp_path): path = tmp_path / "test.db" - sqlite_con = sqlite3.connect(str(path)) sqlite_con.execute("CREATE TABLE t AS SELECT 1 a UNION SELECT 2 UNION SELECT 3") - ft = con.register(f"sqlite://{path}", "t") + with pytest.warns(FutureWarning, match="v9.0"): + ft = con.register(f"sqlite://{path}", "t") assert ft.count().execute() diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index a09c8c0ff0fb6..3523a7454b185 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -22,7 +22,7 @@ from ibis.backends.sql.dialects import Polars from ibis.expr.rewrites import rewrite_stringslice from ibis.formats.polars import PolarsSchema -from ibis.util import gen_name, normalize_filename +from ibis.util import deprecated, gen_name, normalize_filename if TYPE_CHECKING: from collections.abc import Iterable @@ -74,6 +74,10 @@ def table(self, name: str, _schema: sch.Schema | None = None) -> ir.Table: schema = PolarsSchema.to_ibis(self._tables[name].schema) return ops.DatabaseTable(name, schema, self).to_expr() + @deprecated( + as_of="9.0", + instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.", + ) def register( self, source: str | Path | Any, diff --git a/ibis/backends/polars/tests/conftest.py b/ibis/backends/polars/tests/conftest.py index d8428b24390a9..a8d727bf34e2b 100644 --- a/ibis/backends/polars/tests/conftest.py +++ b/ibis/backends/polars/tests/conftest.py @@ -23,10 +23,13 @@ def _load_data(self, **_: Any) -> None: con = self.connection for table_name in TEST_TABLES: path = self.data_dir / "parquet" / f"{table_name}.parquet" - con.register(path, table_name=table_name) - con.register(array_types, table_name="array_types") - con.register(struct_types, table_name="struct") - con.register(win, table_name="win") + with pytest.warns(FutureWarning, match="v9.0"): + con.register(path, table_name=table_name) + # TODO: remove warnings and replace register when implementing 8858 + with pytest.warns(FutureWarning, match="v9.0"): + con.register(array_types, table_name="array_types") + con.register(struct_types, table_name="struct") + con.register(win, table_name="win") # TODO: remove when pyarrow inputs are supported con._add_table("topk", pl.from_arrow(topk).lazy()) diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index ca346b6583f6b..cc8a7edb4ef8a 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -25,6 +25,7 @@ from ibis.backends.sql import SQLBackend from ibis.expr.operations.udf import InputType from ibis.legacy.udf.vectorized import _coerce_to_series +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -649,6 +650,10 @@ def read_json( spark_df.createOrReplaceTempView(table_name) return self.table(table_name) + @deprecated( + as_of="9.0", + instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.", + ) def register( self, source: str | Path | Any, diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index dec45a4571e6b..e62638fedf7c8 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -40,6 +40,7 @@ def gzip_csv(data_dir, tmp_path): return str(f.absolute()) +# TODO: rewrite or delete test when register api is removed @pytest.mark.parametrize( ("fname", "in_table_name", "out_table_name"), [ @@ -99,13 +100,15 @@ def gzip_csv(data_dir, tmp_path): ) def test_register_csv(con, data_dir, fname, in_table_name, out_table_name): with pushd(data_dir / "csv"): - table = con.register(fname, table_name=in_table_name) + with pytest.warns(FutureWarning, match="v9.0"): + table = con.register(fname, table_name=in_table_name) assert any(out_table_name in t for t in con.list_tables()) if con.name != "datafusion": table.count().execute() +# TODO: rewrite or delete test when register api is removed @pytest.mark.notimpl(["datafusion"]) @pytest.mark.notyet( [ @@ -126,11 +129,13 @@ def test_register_csv(con, data_dir, fname, in_table_name, out_table_name): ) def test_register_csv_gz(con, data_dir, gzip_csv): with pushd(data_dir): - table = con.register(gzip_csv) + with pytest.warns(FutureWarning, match="v9.0"): + table = con.register(gzip_csv) assert table.count().execute() +# TODO: rewrite or delete test when register api is removed @pytest.mark.notyet( [ "bigquery", @@ -154,7 +159,8 @@ def test_register_with_dotted_name(con, data_dir, tmp_path): f.parent.mkdir() data = data_dir.joinpath("csv", "diamonds.csv").read_bytes() f.write_bytes(data) - table = con.register(str(f.absolute())) + with pytest.warns(FutureWarning, match="v9.0"): + table = con.register(str(f.absolute())) if con.name != "datafusion": table.count().execute() @@ -173,6 +179,7 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]: return pac.read_csv(data_dir / f"{table_name}.csv", convert_options=convert_options) +# TODO: rewrite or delete test when register api is removed @pytest.mark.parametrize( ("fname", "in_table_name", "out_table_name"), [ @@ -216,14 +223,16 @@ def test_register_parquet( pq.write_table(table, tmp_path / fname.name) with pushd(tmp_path): - table = con.register(f"parquet://{fname.name}", table_name=in_table_name) + with pytest.warns(FutureWarning, match="v9.0"): + table = con.register(f"parquet://{fname.name}", table_name=in_table_name) - assert any(out_table_name in t for t in con.list_tables()) + assert any(out_table_name in t for t in con.list_tables()) if con.name != "datafusion": table.count().execute() +# TODO: rewrite or delete test when register api is removed @pytest.mark.notyet( [ "bigquery", @@ -255,15 +264,20 @@ def test_register_iterator_parquet( pq.write_table(table, tmp_path / "functional_alltypes.parquet") with pushd(tmp_path): - table = con.register( - ["parquet://functional_alltypes.parquet", "functional_alltypes.parquet"], - table_name=None, - ) + with pytest.warns(FutureWarning, match="v9.0"): + table = con.register( + [ + "parquet://functional_alltypes.parquet", + "functional_alltypes.parquet", + ], + table_name=None, + ) assert any("ibis_read_parquet" in t for t in con.list_tables()) assert table.count().execute() +# TODO: modify test to use read_in_memory when implemented xref: 8858 @pytest.mark.notimpl(["datafusion"]) @pytest.mark.notyet( [ @@ -287,14 +301,17 @@ def test_register_pandas(con): pd = pytest.importorskip("pandas") df = pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}) - t = con.register(df) + with pytest.warns(FutureWarning, match="v9.0"): + t = con.register(df) assert t.x.sum().execute() == 6 - t = con.register(df, "my_table") + with pytest.warns(FutureWarning, match="v9.0"): + t = con.register(df, "my_table") assert t.op().name == "my_table" assert t.x.sum().execute() == 6 +# TODO: modify test to use read_in_memory when implemented xref: 8858 @pytest.mark.notimpl(["datafusion", "polars"]) @pytest.mark.notyet( [ @@ -318,7 +335,8 @@ def test_register_pyarrow_tables(con): pa = pytest.importorskip("pyarrow") pa_t = pa.Table.from_pydict({"x": [1, 2, 3], "y": ["a", "b", "c"]}) - t = con.register(pa_t) + with pytest.warns(FutureWarning, match="v9.0"): + t = con.register(pa_t) assert t.x.sum().execute() == 6 @@ -351,8 +369,9 @@ def test_csv_reregister_schema(con, tmp_path): ] ) - # For a full file scan, expect correct schema based on final row - foo_table = con.register(foo, table_name="same") + with pytest.warns(FutureWarning, match="v9.0"): + # For a full file scan, expect correct schema based on final row + foo_table = con.register(foo, table_name="same") result_schema = foo_table.schema() assert result_schema.names == ("cola", "colb", "colc")