Skip to content

Commit

Permalink
refactor: drop limit kwarg from to_parquet/to_csv
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist authored and gforsyth committed Feb 7, 2023
1 parent c264477 commit a54460c
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 36 deletions.
12 changes: 2 additions & 10 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,6 @@ def to_parquet(
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a parquet file.
Expand All @@ -392,9 +391,6 @@ def to_parquet(
The data source. A string or Path to the parquet file.
params
Mapping of scalar parameter expressions to value.
limit
An integer to effect a specific row limit. A value of `None` means
"no limit". The default is in `ibis/config.py`.
**kwargs
Additional keyword arguments passed to pyarrow.parquet.ParquetWriter
Expand All @@ -403,7 +399,7 @@ def to_parquet(
self._import_pyarrow()
import pyarrow.parquet as pq

batch_reader = expr.to_pyarrow_batches(params=params, limit=limit)
batch_reader = expr.to_pyarrow_batches(params=params)

with pq.ParquetWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
Expand All @@ -416,7 +412,6 @@ def to_csv(
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a CSV file.
Expand All @@ -432,9 +427,6 @@ def to_csv(
The data source. A string or Path to the CSV file.
params
Mapping of scalar parameter expressions to value.
limit
An integer to effect a specific row limit. A value of `None` means
"no limit". The default is in `ibis/config.py`.
**kwargs
Additional keyword arguments passed to pyarrow.csv.CSVWriter
Expand All @@ -443,7 +435,7 @@ def to_csv(
self._import_pyarrow()
import pyarrow.csv as pcsv

batch_reader = expr.to_pyarrow_batches(params=params, limit=limit)
batch_reader = expr.to_pyarrow_batches(params=params)

with pcsv.CSVWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
Expand Down
22 changes: 8 additions & 14 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,31 +153,25 @@ def test_no_pyarrow_message(awards_players, monkeypatch):
awards_players.to_pyarrow()


@pytest.mark.parametrize("limit", limit_no_limit)
def test_table_to_parquet(tmp_path, backend, limit, awards_players):
@pytest.mark.notimpl(["dask", "impala", "pyspark"])
def test_table_to_parquet(tmp_path, backend, awards_players):
outparquet = tmp_path / "out.parquet"
awards_players.to_parquet(outparquet, limit=limit)
awards_players.to_parquet(outparquet)

df = pd.read_parquet(outparquet)

backend.assert_frame_equal(awards_players.execute(limit=limit), df)

if limit is not None:
assert len(df) == limit
backend.assert_frame_equal(awards_players.execute(), df)


@pytest.mark.parametrize("limit", limit_no_limit)
def test_table_to_csv(tmp_path, backend, limit, awards_players):
@pytest.mark.notimpl(["dask", "impala", "pyspark"])
def test_table_to_csv(tmp_path, backend, awards_players):
outcsv = tmp_path / "out.csv"

# avoid pandas NaNonense
awards_players = awards_players.select("playerID", "awardID", "yearID", "lgID")

awards_players.to_csv(outcsv, limit=limit)
awards_players.to_csv(outcsv)

df = pd.read_csv(outcsv, dtype=awards_players.schema().to_pandas())

backend.assert_frame_equal(awards_players.execute(limit=limit), df)

if limit is not None:
assert len(df) == limit
backend.assert_frame_equal(awards_players.execute(), df)
14 changes: 2 additions & 12 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ def to_parquet(
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a parquet file
Expand All @@ -420,25 +419,19 @@ def to_parquet(
The data source. A string or Path to the parquet file.
params
Mapping of scalar parameter expressions to value.
limit
An integer to effect a specific row limit. A value of `None` means
"no limit". The default is in `ibis/config.py`.
**kwargs
Additional keyword arguments passed to pyarrow.parquet.ParquetWriter
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html
"""
self._find_backend(use_default=True).to_parquet(
self, path, limit=limit, **kwargs
)
self._find_backend(use_default=True).to_parquet(self, path, **kwargs)

@experimental
def to_csv(
self,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a CSV file
Expand All @@ -452,15 +445,12 @@ def to_csv(
The data source. A string or Path to the CSV file.
params
Mapping of scalar parameter expressions to value.
limit
An integer to effect a specific row limit. A value of `None` means
"no limit". The default is in `ibis/config.py`.
**kwargs
Additional keyword arguments passed to pyarrow.csv.CSVWriter
https://arrow.apache.org/docs/python/generated/pyarrow.csv.CSVWriter.html
"""
self._find_backend(use_default=True).to_csv(self, path, limit=limit, **kwargs)
self._find_backend(use_default=True).to_csv(self, path, **kwargs)

def unbind(self) -> ir.Table:
"""Return an expression built on `UnboundTable` instead of backend-specific objects."""
Expand Down

0 comments on commit a54460c

Please sign in to comment.