Skip to content

Commit

Permalink
Merge pull request #190 from martindurant/numerical_partition_keys
Browse files Browse the repository at this point in the history
Account for partition dir names with numbers
  • Loading branch information
martindurant committed Jul 30, 2017
2 parents 2180117 + 705c4dd commit 63bbac6
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
9 changes: 9 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ def test_single_upper_directory(tempdir):
assert (out.y == 'aa').all()


def test_numerical_partition_name(tempdir):
df = pd.DataFrame({'x': [1, 5, 2, 5], 'y1': ['aa', 'aa', 'bb', 'aa']})
write(tempdir, df, file_scheme='hive', partition_on=['y1'])
pf = ParquetFile(tempdir)
out = pf.to_pandas()
assert out[out.y1 == 'aa'].x.tolist() == [1, 5, 5]
assert out[out.y1 == 'bb'].x.tolist() == [2]


def test_filter_without_paths(tempdir):
fn = os.path.join(tempdir, 'test.parq')
df = pd.DataFrame({
Expand Down
4 changes: 2 additions & 2 deletions fastparquet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ def ex_from_sep(sep):
"""Generate regex for category folder matching"""
if sep not in seps:
if sep in r'\^$.|?*+()[]':
s = re.compile(r"([a-zA-Z_]+)=([^\{}]+)".format(sep))
s = re.compile(r"([a-zA-Z_0-9]+)=([^\{}]+)".format(sep))
else:
s = re.compile("([a-zA-Z_]+)=([^{}]+)".format(sep))
s = re.compile("([a-zA-Z_0-9]+)=([^{}]+)".format(sep))
seps[sep] = s
return seps[sep]

Expand Down

0 comments on commit 63bbac6

Please sign in to comment.