Skip to content

Commit

Permalink
feat(pandas): add approx_median
Browse files Browse the repository at this point in the history
  • Loading branch information
mesejo authored and cpcloud committed May 2, 2023
1 parent ef3cc22 commit 6714b9f
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 1 deletion.
17 changes: 17 additions & 0 deletions ibis/backends/pandas/execution/generic.py
Expand Up @@ -916,6 +916,23 @@ def mode(x):
return aggcontext.agg(data, mode)


@execute_node.register(ops.ApproxMedian, pd.Series, (pd.Series, type(None)))
def execute_approx_median_series(_, data, mask, aggcontext=None, **kwargs):
return aggcontext.agg(
data[mask] if mask is not None else data, lambda x: x.median()
)


@execute_node.register(ops.ApproxMedian, SeriesGroupBy, (SeriesGroupBy, type(None)))
def execute_approx_median_series_groupby(_, data, mask, aggcontext=None, **kwargs):
median = pd.Series.median

if mask is not None:
median = functools.partial(_filtered_reduction, mask.obj, median)

return aggcontext.agg(data, median)


@execute_node.register((ops.Not, ops.Negate), (bool, np.bool_))
def execute_not_bool(_, data, **kwargs):
return not data
Expand Down
17 changes: 17 additions & 0 deletions ibis/backends/pandas/tests/execution/test_functions.py
Expand Up @@ -270,3 +270,20 @@ def func(x):
result = result.tolist()
assert result == [value]
assert type(result[0]) is type(value)


@pytest.mark.parametrize(
('ibis_func', 'pandas_func'),
[
(
lambda x: x.approx_median(),
lambda x: x.median(),
)
],
)
@pytest.mark.parametrize('column', ['float64_with_zeros', 'int64_with_zeros'])
def test_approx_median(t, df, ibis_func, pandas_func, column):
expr = ibis_func(t[column])
result = expr.execute()
expected = pandas_func(df[column])
assert expected == result
9 changes: 9 additions & 0 deletions ibis/backends/pandas/tests/execution/test_window.py
Expand Up @@ -209,6 +209,15 @@ def test_batting_quantile(players, players_df):
tm.assert_frame_equal(result, expected)


def test_batting_approx_median(players, players_df):
expr = players.mutate(hits_median=lambda t: t.H.approx_median())
hits_median = players_df.groupby('playerID').H.transform('median')
expected = players_df.assign(hits_median=hits_median)
cols = expected.columns.tolist()
result = expr.execute()[cols].sort_values(cols).reset_index(drop=True)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize('op', ['sum', 'mean', 'min', 'max'])
def test_batting_specific_cumulative(batting, batting_df, op, sort_kind):
ibis_method = methodcaller(f'cum{op}')
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Expand Up @@ -1034,7 +1034,7 @@ def test_corr_cov(
raises=com.OperationNotDefinedError,
)
@pytest.mark.broken(
["dask", "pandas"],
["dask"],
raises=AttributeError,
reason="'Series' object has no attribute 'approx_median'",
)
Expand Down

0 comments on commit 6714b9f

Please sign in to comment.