Skip to content

Commit

Permalink
Merge pull request #165 from martindurant/fix_in_filter
Browse files Browse the repository at this point in the history
"in" was checking range not exact membership of set
  • Loading branch information
martindurant committed Jun 7, 2017
2 parents 13b7978 + d4471a0 commit abb3bd6
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
3 changes: 3 additions & 0 deletions fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,9 @@ def filter_val(op, val, vmin=None, vmax=None):
-------
True or False
"""
if (op == 'in' and vmax is not None and vmin is not None and
vmax == vmin and vmax not in val):
return True
if vmax is not None:
if isinstance(vmax, np.ndarray):
vmax = vmax[0]
Expand Down
10 changes: 10 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,16 @@ def test_filter_special(tempdir):
assert out.symbol.tolist() == ['NOW', 'NOW', 'NOW']


def test_in_filter(tempdir):
symbols = ['a', 'a', 'b', 'c', 'c', 'd']
values = [1, 2, 3, 4, 5, 6]
df = pd.DataFrame(data={'symbols': symbols, 'values': values})
write(tempdir, df, file_scheme='hive', partition_on=['symbols'])
pf = ParquetFile(tempdir)
out = pf.to_pandas(filters=[('symbols', 'in', ['a', 'c'])])
assert set(out.symbols) == {'a', 'c'}


def test_filter_stats(tempdir):
df = pd.DataFrame({
'x': [1, 2, 3, 4, 5, 6, 7],
Expand Down

0 comments on commit abb3bd6

Please sign in to comment.