Skip to content

Commit

Permalink
Handle zero-column files. Closes #361. (#363)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamhooper authored and martindurant committed Aug 21, 2018
1 parent 5f06d4e commit e5ced3c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 3 deletions.
8 changes: 5 additions & 3 deletions fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,10 @@ def _set_attrs(self):
self.group_files.setdefault(i, set()).add(chunk.file_path)
self.schema = schema.SchemaHelper(self._schema)
self.selfmade = self.created_by.split(' ', 1)[0] == "fastparquet-python"
self.file_scheme = get_file_scheme([rg.columns[0].file_path
for rg in self.row_groups])
files = [rg.columns[0].file_path
for rg in self.row_groups
if rg.columns]
self.file_scheme = get_file_scheme(files)
self._read_partitions()
self._dtypes()

Expand Down Expand Up @@ -215,7 +217,7 @@ def _read_partitions(self):
for key, v in cats.items()])

def row_group_filename(self, rg):
if rg.columns[0].file_path:
if rg.columns and rg.columns[0].file_path:
base = self.fn.replace('_metadata', '').rstrip('/')
if base:
return join_path(base, rg.columns[0].file_path)
Expand Down
15 changes: 15 additions & 0 deletions fastparquet/test/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,18 @@ def test_multi_index_category(tempdir):
assert dg.index.levels[0].dtype == '<M8[ns]'
assert dg.index.levels[1].name == 'b'
assert dg.equals(df)

def test_no_columns(tempdir):
# https://github.com/dask/fastparquet/issues/361
# Create a non-empty DataFrame, then select no columns. That way we get
# _some_ rows, _no_ columns.
#
# df = pd.DataFrame({"A": [1, 2]})[[]]
# fastparquet.write("test-data/no_columns.parquet", df)
pf = fastparquet.ParquetFile(os.path.join(TEST_DATA, "no_columns.parquet"))
assert pf.count == 2
assert pf.columns == []
result = pf.to_pandas()
expected = pd.DataFrame({"A": [1, 2]})[[]]
assert len(result) == 2
pd.testing.assert_frame_equal(result, expected)
Binary file added test-data/no_columns.parquet
Binary file not shown.

0 comments on commit e5ced3c

Please sign in to comment.