Skip to content

Commit

Permalink
fix(histogram): ensure that the bin width calculation matches numpy
Browse files Browse the repository at this point in the history
This commit ensures the bin width calculation for the `histogram` API
matches numpy's implementation. There's still some work to do to ensure
the final calculation matches (the buckets aren't quite matching yet),
but this gets us closer to the desired outcome.
  • Loading branch information
NickCrews authored and cpcloud committed Aug 12, 2023
1 parent ce0e0b9 commit e6a0037
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
4 changes: 4 additions & 0 deletions ibis/backends/dask/execution/generic.py
Expand Up @@ -34,6 +34,7 @@
date_types,
integer_types,
numeric_types,
scalar_types,
simple_types,
timestamp_types,
)
Expand Down Expand Up @@ -361,6 +362,9 @@ def execute_not_scalar_or_series(op, data, **kwargs):
@execute_node.register(ops.Binary, dd.Series, dd.Series)
@execute_node.register(ops.Binary, dd.Series, dd.core.Scalar)
@execute_node.register(ops.Binary, dd.core.Scalar, dd.Series)
@execute_node.register(ops.Binary, dd.core.Scalar, scalar_types)
@execute_node.register(ops.Binary, scalar_types, dd.core.Scalar)
@execute_node.register(ops.Binary, dd.core.Scalar, dd.core.Scalar)
@execute_node.register(
(ops.NumericBinary, ops.LogicalBinary, ops.Comparison),
numeric_types,
Expand Down
6 changes: 4 additions & 2 deletions ibis/backends/tests/test_numeric.py
Expand Up @@ -1371,8 +1371,10 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func):
)
def test_histogram(con, alltypes):
n = 10
results = con.execute(alltypes.int_col.histogram(n).name("tmp"))
assert len(results.value_counts()) == n
hist = con.execute(alltypes.int_col.histogram(n).name("hist"))
vc = hist.value_counts().sort_index()
vc_np, _bin_edges = np.histogram(alltypes.int_col.execute(), bins=n)
assert vc.tolist() == vc_np.tolist()


@pytest.mark.parametrize("const", ["pi", "e"])
Expand Down
8 changes: 2 additions & 6 deletions ibis/expr/types/numeric.py
Expand Up @@ -1032,17 +1032,13 @@ def histogram(
)

if binwidth is None or base is None:
import ibis

if nbins is None:
raise ValueError("`nbins` is required if `binwidth` is not provided")

empty_window = ibis.window()

if base is None:
base = self.min().over(empty_window) - eps
base = self.min() - eps

binwidth = (self.max().over(empty_window) - base) / (nbins - 1)
binwidth = (self.max() - base) / nbins

return ((self - base) / binwidth).floor()

Expand Down

0 comments on commit e6a0037

Please sign in to comment.