Skip to content

Commit

Permalink
fix another typing thing
Browse files Browse the repository at this point in the history
  • Loading branch information
fjetter committed Feb 13, 2024
1 parent bce9d82 commit f0d4956
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
20 changes: 10 additions & 10 deletions dask_deltatable/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ def _read_delta_partition(
pyarrow_to_pandas["types_mapper"] = _get_type_mapper(
pyarrow_to_pandas.get("types_mapper")
)
return (
pa_ds.dataset(
source=filename,
schema=schema,
filesystem=fs,
format="parquet",
partitioning="hive",
)
.to_table(filter=filter_expression, columns=columns)
.to_pandas(**pyarrow_to_pandas)
pyarrow_to_pandas["ignore_metadata"] = pyarrow_to_pandas.get(
"ignore_metadata", False
)
table = pa_ds.dataset(
source=filename,
schema=schema,
filesystem=fs,
format="parquet",
partitioning="hive",
).to_table(filter=filter_expression, columns=columns)
return table.to_pandas(**pyarrow_to_pandas)


def _read_from_filesystem(
Expand Down
4 changes: 4 additions & 0 deletions tests/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def test_reader_all_primitive_types():
expected_ddf = dd.read_parquet(
f"{DATA_DIR}/all_primitive_types/expected/latest/table_content/*parquet"
)
# Dask and delta go through different parquet parsers which read the
# timestamp differently. This is likely a bug in arrow but the delta result
# is "more correct".
expected_ddf["timestamp"] = expected_ddf["timestamp"].astype("datetime64[us]")
assert_eq(actual_ddf, expected_ddf)


Expand Down

0 comments on commit f0d4956

Please sign in to comment.