Skip to content

Commit

Permalink
chore: deprecate register api
Browse files Browse the repository at this point in the history
chore: add comment in tests to be modify when we have read_in_memory

chore: comment to rewrite or delete register test when removing api
  • Loading branch information
ncclementi committed Apr 10, 2024
1 parent ab0eb57 commit 5b02581
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 36 deletions.
6 changes: 5 additions & 1 deletion ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from ibis.backends.sql.compiler import C
from ibis.expr.operations.udf import InputType
from ibis.formats.pyarrow import PyArrowType
from ibis.util import gen_name, normalize_filename
from ibis.util import deprecated, gen_name, normalize_filename

try:
from datafusion import ExecutionContext as SessionContext
Expand Down Expand Up @@ -294,6 +294,10 @@ def get_schema(
table = database.table(table_name)
return sch.schema(table.schema)

@deprecated(
as_of="9.0",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
Expand Down
11 changes: 7 additions & 4 deletions ibis/backends/datafusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ def _load_data(self, **_: Any) -> None:
con = self.connection
for table_name in TEST_TABLES:
path = self.data_dir / "parquet" / f"{table_name}.parquet"
con.register(path, table_name=table_name)
con.register(array_types, table_name="array_types")
con.register(win, table_name="win")
con.register(topk, table_name="topk")
with pytest.warns(FutureWarning, match="v9.0"):
con.register(path, table_name=table_name)
# TODO: remove warnings and replace register when implementing 8858
with pytest.warns(FutureWarning, match="v9.0"):
con.register(array_types, table_name="array_types")
con.register(win, table_name="win")
con.register(topk, table_name="topk")

@staticmethod
def connect(*, tmpdir, worker_id, **kw):
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/datafusion/tests/test_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ def test_none_config():

def test_str_config(name_to_path):
config = {name: str(path) for name, path in name_to_path.items()}
conn = ibis.datafusion.connect(config)
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.0"):
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


def test_path_config(name_to_path):
config = name_to_path
conn = ibis.datafusion.connect(config)
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.0"):
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


Expand Down
18 changes: 11 additions & 7 deletions ibis/backends/datafusion/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,28 @@ def test_read_parquet(conn, data_dir):

def test_register_table(conn):
tab = pa.table({"x": [1, 2, 3]})
conn.register(tab, "my_table")
with pytest.warns(FutureWarning, match="v9.0"):
conn.register(tab, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_pandas(conn):
df = pd.DataFrame({"x": [1, 2, 3]})
conn.register(df, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.0"):
conn.register(df, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_batches(conn):
batch = pa.record_batch([pa.array([1, 2, 3])], names=["x"])
conn.register(batch, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.0"):
conn.register(batch, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_dataset(conn):
tab = pa.table({"x": [1, 2, 3]})
dataset = ds.InMemoryDataset(tab)
conn.register(dataset, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.0"):
conn.register(dataset, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
5 changes: 5 additions & 0 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compiler import STAR, C, F
from ibis.expr.operations.udf import InputType
from ibis.util import deprecated

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
Expand Down Expand Up @@ -515,6 +516,10 @@ def drop_database(
with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)):
pass

@deprecated(
as_of="9.0",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
4 changes: 3 additions & 1 deletion ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,9 @@ def test_connect_duckdb(url, tmp_path):
)
def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path):
getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}")
con = ibis.connect(tmp_path / f"out.{extension}")
with pytest.warns(FutureWarning, match="v9.0"):
# ibis.connect uses con.register
con = ibis.connect(tmp_path / f"out.{extension}")
t = next(iter(con.tables.values()))
assert not t.head().execute().empty

Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,10 @@ def test_read_sqlite_no_table_name(con, tmp_path):
)
def test_register_sqlite(con, tmp_path):
path = tmp_path / "test.db"

sqlite_con = sqlite3.connect(str(path))
sqlite_con.execute("CREATE TABLE t AS SELECT 1 a UNION SELECT 2 UNION SELECT 3")
ft = con.register(f"sqlite://{path}", "t")
with pytest.warns(FutureWarning, match="v9.0"):
ft = con.register(f"sqlite://{path}", "t")
assert ft.count().execute()


Expand Down
6 changes: 5 additions & 1 deletion ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ibis.backends.sql.dialects import Polars
from ibis.expr.rewrites import rewrite_stringslice
from ibis.formats.polars import PolarsSchema
from ibis.util import gen_name, normalize_filename
from ibis.util import deprecated, gen_name, normalize_filename

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -74,6 +74,10 @@ def table(self, name: str, _schema: sch.Schema | None = None) -> ir.Table:
schema = PolarsSchema.to_ibis(self._tables[name].schema)
return ops.DatabaseTable(name, schema, self).to_expr()

@deprecated(
as_of="9.0",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
11 changes: 7 additions & 4 deletions ibis/backends/polars/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ def _load_data(self, **_: Any) -> None:
con = self.connection
for table_name in TEST_TABLES:
path = self.data_dir / "parquet" / f"{table_name}.parquet"
con.register(path, table_name=table_name)
con.register(array_types, table_name="array_types")
con.register(struct_types, table_name="struct")
con.register(win, table_name="win")
with pytest.warns(FutureWarning, match="v9.0"):
con.register(path, table_name=table_name)
# TODO: remove warnings and replace register when implementing 8858
with pytest.warns(FutureWarning, match="v9.0"):
con.register(array_types, table_name="array_types")
con.register(struct_types, table_name="struct")
con.register(win, table_name="win")

# TODO: remove when pyarrow inputs are supported
con._add_table("topk", pl.from_arrow(topk).lazy())
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ibis.backends.sql import SQLBackend
from ibis.expr.operations.udf import InputType
from ibis.legacy.udf.vectorized import _coerce_to_series
from ibis.util import deprecated

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
Expand Down Expand Up @@ -649,6 +650,10 @@ def read_json(
spark_df.createOrReplaceTempView(table_name)
return self.table(table_name)

@deprecated(
as_of="9.0",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
47 changes: 33 additions & 14 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def gzip_csv(data_dir, tmp_path):
return str(f.absolute())


# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
[
Expand Down Expand Up @@ -99,13 +100,15 @@ def gzip_csv(data_dir, tmp_path):
)
def test_register_csv(con, data_dir, fname, in_table_name, out_table_name):
with pushd(data_dir / "csv"):
table = con.register(fname, table_name=in_table_name)
with pytest.warns(FutureWarning, match="v9.0"):
table = con.register(fname, table_name=in_table_name)

assert any(out_table_name in t for t in con.list_tables())
if con.name != "datafusion":
table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notimpl(["datafusion"])
@pytest.mark.notyet(
[
Expand All @@ -126,11 +129,13 @@ def test_register_csv(con, data_dir, fname, in_table_name, out_table_name):
)
def test_register_csv_gz(con, data_dir, gzip_csv):
with pushd(data_dir):
table = con.register(gzip_csv)
with pytest.warns(FutureWarning, match="v9.0"):
table = con.register(gzip_csv)

assert table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notyet(
[
"bigquery",
Expand All @@ -154,7 +159,8 @@ def test_register_with_dotted_name(con, data_dir, tmp_path):
f.parent.mkdir()
data = data_dir.joinpath("csv", "diamonds.csv").read_bytes()
f.write_bytes(data)
table = con.register(str(f.absolute()))
with pytest.warns(FutureWarning, match="v9.0"):
table = con.register(str(f.absolute()))

if con.name != "datafusion":
table.count().execute()
Expand All @@ -173,6 +179,7 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:
return pac.read_csv(data_dir / f"{table_name}.csv", convert_options=convert_options)


# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
[
Expand Down Expand Up @@ -216,14 +223,16 @@ def test_register_parquet(
pq.write_table(table, tmp_path / fname.name)

with pushd(tmp_path):
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)
with pytest.warns(FutureWarning, match="v9.0"):
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)

assert any(out_table_name in t for t in con.list_tables())
assert any(out_table_name in t for t in con.list_tables())

if con.name != "datafusion":
table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notyet(
[
"bigquery",
Expand Down Expand Up @@ -255,15 +264,20 @@ def test_register_iterator_parquet(
pq.write_table(table, tmp_path / "functional_alltypes.parquet")

with pushd(tmp_path):
table = con.register(
["parquet://functional_alltypes.parquet", "functional_alltypes.parquet"],
table_name=None,
)
with pytest.warns(FutureWarning, match="v9.0"):
table = con.register(
[
"parquet://functional_alltypes.parquet",
"functional_alltypes.parquet",
],
table_name=None,
)

assert any("ibis_read_parquet" in t for t in con.list_tables())
assert table.count().execute()


# TODO: modify test to use read_in_memory when implemented xref: 8858
@pytest.mark.notimpl(["datafusion"])
@pytest.mark.notyet(
[
Expand All @@ -287,14 +301,17 @@ def test_register_pandas(con):
pd = pytest.importorskip("pandas")
df = pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]})

t = con.register(df)
with pytest.warns(FutureWarning, match="v9.0"):
t = con.register(df)
assert t.x.sum().execute() == 6

t = con.register(df, "my_table")
with pytest.warns(FutureWarning, match="v9.0"):
t = con.register(df, "my_table")
assert t.op().name == "my_table"
assert t.x.sum().execute() == 6


# TODO: modify test to use read_in_memory when implemented xref: 8858
@pytest.mark.notimpl(["datafusion", "polars"])
@pytest.mark.notyet(
[
Expand All @@ -318,7 +335,8 @@ def test_register_pyarrow_tables(con):
pa = pytest.importorskip("pyarrow")
pa_t = pa.Table.from_pydict({"x": [1, 2, 3], "y": ["a", "b", "c"]})

t = con.register(pa_t)
with pytest.warns(FutureWarning, match="v9.0"):
t = con.register(pa_t)
assert t.x.sum().execute() == 6


Expand Down Expand Up @@ -351,8 +369,9 @@ def test_csv_reregister_schema(con, tmp_path):
]
)

# For a full file scan, expect correct schema based on final row
foo_table = con.register(foo, table_name="same")
with pytest.warns(FutureWarning, match="v9.0"):
# For a full file scan, expect correct schema based on final row
foo_table = con.register(foo, table_name="same")
result_schema = foo_table.schema()

assert result_schema.names == ("cola", "colb", "colc")
Expand Down

0 comments on commit 5b02581

Please sign in to comment.