Skip to content

Commit

Permalink
Fix stats filter
Browse files Browse the repository at this point in the history
Odd things happen if you pass a one-element NDarray...
  • Loading branch information
Martin Durant committed Jun 1, 2017
1 parent 24753fa commit 30742fb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
8 changes: 6 additions & 2 deletions fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,14 +659,18 @@ def filter_val(op, val, vmin=None, vmax=None):
-------
True or False
"""
if vmin is not None:
if vmax is not None:
if isinstance(vmax, np.ndarray):
vmax = vmax[0]
if op in ['==', '>='] and val > vmax:
return True
if op == '>' and val >= vmax:
return True
if op == 'in' and min(val) > vmax:
return True
if vmax is not None:
if vmin is not None:
if isinstance(vmin, np.ndarray):
vmin = vmin[0]
if op in ['==', '<='] and val < vmin:
return True
if op == '<' and val <= vmin:
Expand Down
10 changes: 10 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,13 @@ def test_filter_special(tempdir):
out = pf.to_pandas(filters=[('symbol', '==', 'NOW')])
assert out.x.tolist() == [1, 5, 6]
assert out.symbol.tolist() == ['NOW', 'NOW', 'NOW']


def test_filter_stats(tempdir):
df = pd.DataFrame({
'x': [1, 2, 3, 4, 5, 6, 7],
})
write(tempdir, df, file_scheme='hive', row_group_offsets=[0, 4])
pf = ParquetFile(tempdir)
out = pf.to_pandas(filters=[('x', '>=', 5)])
assert out.x.tolist() == [5, 6, 7]

0 comments on commit 30742fb

Please sign in to comment.