From 8c6ad90547761cc47bad12e3a09e7f9ae229b13f Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 30 Sep 2025 13:09:43 +0200 Subject: [PATCH] Add rank estimation for exponential histograms --- .../ExponentialHistogramQuantile.java | 39 ++++++++ .../RankAccuracyTests.java | 90 +++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RankAccuracyTests.java diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index 218873982b1b3..9d78525d5eec9 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -70,6 +70,45 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { return removeNegativeZero(result); } + /** + * Estimates the rank of a given value in the distribution represented by the histogram. + * In other words, returns the number of values which are less than (or less-or-equal, if {@code inclusive} is true) + * the provided value. + * + * @param histo the histogram to query + * @param value the value to estimate the rank for + * @param inclusive if true, counts values equal to the given value as well + * @return the number of elements less than (or less-or-equal, if {@code inclusive} is true) the given value + */ + public static long estimateRank(ExponentialHistogram histo, double value, boolean inclusive) { + if (value >= 0) { + long rank = histo.negativeBuckets().valueCount(); + if (value > 0 || inclusive) { + rank += histo.zeroBucket().count(); + } + rank += estimateRank(histo.positiveBuckets().iterator(), value, inclusive, histo.max()); + return rank; + } else { + long numValuesGreater = estimateRank(histo.negativeBuckets().iterator(), -value, inclusive == false, -histo.min()); + return histo.negativeBuckets().valueCount() - numValuesGreater; + } + } + + private static long estimateRank(BucketIterator buckets, double value, boolean inclusive, double maxValue) { + long rank = 0; + while (buckets.hasNext()) { + double bucketMidpoint = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); + bucketMidpoint = Math.min(bucketMidpoint, maxValue); + if (bucketMidpoint < value || (inclusive && bucketMidpoint == value)) { + rank += buckets.peekCount(); + buckets.advance(); + } else { + break; + } + } + return rank; + } + private static double removeNegativeZero(double result) { return result == 0.0 ? 0.0 : result; } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RankAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RankAccuracyTests.java new file mode 100644 index 0000000000000..86bb9ab2650db --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RankAccuracyTests.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import java.util.Arrays; +import java.util.stream.DoubleStream; + +import static org.hamcrest.Matchers.equalTo; + +public class RankAccuracyTests extends ExponentialHistogramTestCase { + + public void testRandomDistribution() { + int numValues = randomIntBetween(10, 10_000); + double[] values = new double[numValues]; + + int valuesGenerated = 0; + while (valuesGenerated < values.length) { + double value; + if (randomDouble() < 0.01) { // 1% chance of exact zero + value = 0; + } else { + value = randomDouble() * 2_000_000 - 1_000_000; + } + // Add some duplicates + for (int i = 0; i < randomIntBetween(1, 10) && valuesGenerated < values.length; i++) { + values[valuesGenerated++] = value; + } + } + + int numBuckets = randomIntBetween(4, 400); + ExponentialHistogram histo = createAutoReleasedHistogram(numBuckets, values); + + Arrays.sort(values); + double min = values[0]; + double max = values[values.length - 1]; + + double[] valuesRoundedToBucketCenters = DoubleStream.of(values).map(value -> { + if (value == 0) { + return 0; + } + long index = ExponentialScaleUtils.computeIndex(value, histo.scale()); + double bucketCenter = Math.signum(value) * ExponentialScaleUtils.getPointOfLeastRelativeError(index, histo.scale()); + return Math.clamp(bucketCenter, min, max); + }).toArray(); + + // Test the values at exactly the bucket center for exclusivity correctness + for (double v : valuesRoundedToBucketCenters) { + long inclusiveRank = getRank(v, valuesRoundedToBucketCenters, true); + assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, true), equalTo(inclusiveRank)); + long exclusiveRank = getRank(v, valuesRoundedToBucketCenters, false); + assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, false), equalTo(exclusiveRank)); + } + // Test the original values to have values in between bucket centers + for (double v : values) { + long inclusiveRank = getRank(v, valuesRoundedToBucketCenters, true); + assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, true), equalTo(inclusiveRank)); + long exclusiveRank = getRank(v, valuesRoundedToBucketCenters, false); + assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, false), equalTo(exclusiveRank)); + } + + } + + private static long getRank(double value, double[] sortedValues, boolean inclusive) { + for (int i = 0; i < sortedValues.length; i++) { + if (sortedValues[i] > value || (inclusive == false && sortedValues[i] == value)) { + return i; + } + } + return sortedValues.length; + } +}