diff --git a/fastparquet/api.py b/fastparquet/api.py index 190080ac..dea0c712 100644 --- a/fastparquet/api.py +++ b/fastparquet/api.py @@ -311,11 +311,14 @@ def __getitem__(self, item): new_rgs = self.row_groups[item] if not isinstance(new_rgs, list): new_rgs = [new_rgs] - new_pf = copy.deepcopy(self) - new_pf.fmd.row_groups = new_rgs - new_pf._set_attrs() - # would otherwise be "simple" when selecting one rg - new_pf.file_scheme = self.file_scheme + new_pf = object.__new__(ParquetFile) + fmd = copy.copy(self.fmd) + fmd.row_groups = new_rgs + new_pf.__setstate__( + {"fn": self.fn, "open": self.open, "fmd": fmd, + "pandas_nulls": self.pandas_nulls, "_base_dtype": self._base_dtype, + "tz": self.tz} + ) return new_pf def __len__(self): diff --git a/fastparquet/test/test_dataframe.py b/fastparquet/test/test_dataframe.py index c34602db..5f9ddea2 100644 --- a/fastparquet/test/test_dataframe.py +++ b/fastparquet/test/test_dataframe.py @@ -2,6 +2,7 @@ from unittest import mock import pandas as pd +import pytest from numpy import empty as np_empty from pandas.testing import assert_frame_equal @@ -33,6 +34,7 @@ def test_empty(): assert len(views) == 5 +@pytest.mark.xfail(reason="df._data is going away") def test_empty_tz_utc(): with warnings.catch_warnings(): warnings.simplefilter("error")