Skip to content

Commit

Permalink
Extra field when cloning ParquetFile (#866)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed May 9, 2023
1 parent 067e529 commit f747fe6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
3 changes: 2 additions & 1 deletion fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,9 @@ def __getitem__(self, item):
new_pf.__setstate__(
{"fn": self.fn, "open": self.open, "fmd": fmd,
"pandas_nulls": self.pandas_nulls, "_base_dtype": self._base_dtype,
"tz": self.tz}
"tz": self.tz, "_columns_dtype": self._columns_dtype}
)
new_pf._set_attrs()
return new_pf

def __len__(self):
Expand Down
12 changes: 12 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1513,3 +1513,15 @@ def test_cat_not_cat(tempdir):
pf = ParquetFile(fn)
with pytest.raises(TypeError):
pf.to_pandas(categories=["val"])


def test_select_or_iter():
fn = os.path.join(TEST_DATA, "baz.parquet")
pf = ParquetFile(fn)

df1 = pf[0].to_pandas()
dfs = list(pf.iter_row_groups())
assert len(dfs) == 1

assert df1["id"].tolist() == dfs[0]["id"].tolist() == list(range(32))

Binary file not shown.

0 comments on commit f747fe6

Please sign in to comment.