Skip to content

Commit

Permalink
Merge pull request #218 from martindurant/more_paths
Browse files Browse the repository at this point in the history
More paths
  • Loading branch information
martindurant committed Sep 23, 2017
2 parents ff9497b + d1da8d6 commit 714e320
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
12 changes: 9 additions & 3 deletions fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ def __init__(self, fn, verify=False, open_with=default_open,
if isinstance(fn, (tuple, list)):
basepath, fmd = metadata_from_many(fn, verify_schema=verify,
open_with=open_with, root=root)
self.fn = sep.join([basepath, '_metadata']) # effective file
if basepath:
self.fn = sep.join([basepath, '_metadata']) # effective file
else:
self.fn = '_metadata'
self.fmd = fmd
self._set_attrs()
else:
Expand Down Expand Up @@ -178,8 +181,11 @@ def _read_partitions(self):

def row_group_filename(self, rg):
if rg.columns[0].file_path:
return self.sep.join([os.path.dirname(self.fn),
rg.columns[0].file_path])
base = self.fn.replace('_metadata', '').rstrip(self.sep)
if base:
return self.sep.join([base, rg.columns[0].file_path])
else:
return rg.columns[0].file_path
else:
return self.fn

Expand Down
12 changes: 12 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,15 @@ def test_bad_file_paths(tempdir):
out = pf.to_pandas()
assert out.a.tolist() == ['x', 'y', 'z'] * 2
assert 'dir0' not in out

path1 = os.path.join(tempdir, 'data')
fn1 = os.path.join(path1, 'out.parq')
os.makedirs(path1)
write(fn1, df)
path2 = os.path.join(tempdir, 'data2')
fn2 = os.path.join(path2, 'out.parq')
os.makedirs(path2)
write(fn2, df)
pf = ParquetFile([fn1, fn2])
out = pf.to_pandas()
assert out.a.tolist() == ['x', 'y', 'z'] * 2
2 changes: 1 addition & 1 deletion fastparquet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def analyse_paths(file_list, sep=os.sep, root=False):
l = len(basepath)

else:
basepath = root.split(sep)
basepath = root.rstrip(sep).split(sep)
l = len(basepath)
assert all(p[:l] == basepath for p in path_parts_list
), "All paths must begin with the given root"
Expand Down

0 comments on commit 714e320

Please sign in to comment.