Skip to content

Commit

Permalink
Extract histogramFieldDocValues into an utility class (#63100) (#63148)
Browse files Browse the repository at this point in the history
This function will be needed in the upcoming rate aggs tests.
  • Loading branch information
imotov committed Oct 1, 2020
1 parent 3ad4b00 commit fc13b72
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 317 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/


package org.elasticsearch.xpack.analytics;

import java.io.IOException;

import org.apache.lucene.document.BinaryDocValuesField;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.search.aggregations.metrics.TDigestState;

import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;

public final class AnalyticsTestsUtils {

/**
* Generates an index fields for histogram fields. Used in tests of aggregations that work on histogram fields.
*/
public static BinaryDocValuesField histogramFieldDocValues(String fieldName, double[] values) throws IOException {
TDigest histogram = new TDigestState(100.0); //default
for (double value : values) {
histogram.add(value);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
histogram.compress();
for (Centroid centroid : histogram.centroids()) {
streamOutput.writeVInt(centroid.count());
streamOutput.writeDouble(centroid.mean());
}
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,39 @@

package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram;

import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;
import org.apache.lucene.document.BinaryDocValuesField;
import static java.util.Collections.singleton;
import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;

import java.util.Collections;
import java.util.List;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import static java.util.Collections.singleton;

public class HistoBackedHistogramAggregatorTests extends AggregatorTestCase {

private static final String FIELD_NAME = "field";

public void testHistograms() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30})));

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field(FIELD_NAME)
Expand Down Expand Up @@ -80,9 +73,9 @@ public void testHistograms() throws Exception {
public void testMinDocCount() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30, 90})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30, 90})));

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field(FIELD_NAME)
Expand All @@ -109,8 +102,8 @@ public void testRandomOffset() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
// Note, these values are carefully chosen to ensure that no matter what offset we pick, no two can end up in the same bucket
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3.2, 9.3})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {3.2, 9.3})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));

final double offset = randomDouble();
final double interval = 5;
Expand Down Expand Up @@ -143,8 +136,8 @@ public void testExtendedBounds() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-4.5, 4.3})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field(FIELD_NAME)
Expand Down Expand Up @@ -178,8 +171,8 @@ public void testSubAggs() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3})));
w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-4.5, 4.3})));
w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field(FIELD_NAME)
Expand All @@ -198,23 +191,6 @@ public void testSubAggs() throws Exception {
}
}

private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
TDigest histogram = new TDigestState(100.0); //default
for (double value : values) {
histogram.add(value);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
histogram.compress();
Collection<Centroid> centroids = histogram.centroids();
Iterator<Centroid> iterator = centroids.iterator();
while ( iterator.hasNext()) {
Centroid centroid = iterator.next();
streamOutput.writeVInt(centroid.count());
streamOutput.writeDouble(centroid.mean());
}
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
}

@Override
protected List<SearchPlugin> getSearchPlugins() {
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
*/
package org.elasticsearch.xpack.analytics.aggregations.metrics;

import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.RandomIndexWriter;
Expand All @@ -16,15 +13,13 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.InternalAvg;
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
Expand All @@ -34,14 +29,13 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;

import static java.util.Collections.singleton;
import static org.elasticsearch.search.aggregations.AggregationBuilders.avg;
import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;

public class HistoBackedAvgAggregatorTests extends AggregatorTestCase {

Expand All @@ -58,8 +52,8 @@ public void testNoDocs() throws IOException {

public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
iw.addDocument(singleton(histogramFieldDocValues("wrong_field", new double[] {3, 1.2, 10})));
iw.addDocument(singleton(histogramFieldDocValues("wrong_field", new double[] {5.3, 6, 20})));
}, avg -> {
assertEquals(Double.NaN, avg.getValue(), 0d);
assertFalse(AggregationInspectionHelper.hasValue(avg));
Expand All @@ -68,9 +62,9 @@ public void testNoMatchingField() throws IOException {

public void testSimpleHistogram() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10})));
iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
}, avg -> {
assertEquals(12.0463d, avg.getValue(), 0.01d);
assertTrue(AggregationInspectionHelper.hasValue(avg));
Expand All @@ -81,23 +75,23 @@ public void testQueryFiltering() throws IOException {
testCase(new TermQuery(new Term("match", "yes")), iw -> {
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 20}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
);
}, avg -> {
assertEquals(12.651d, avg.getValue(), 0.01d);
Expand All @@ -111,23 +105,6 @@ private void testCase(Query query,
testCase(avg("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
}

private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
TDigest histogram = new TDigestState(100.0); //default
for (double value : values) {
histogram.add(value);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
histogram.compress();
Collection<Centroid> centroids = histogram.centroids();
Iterator<Centroid> iterator = centroids.iterator();
while ( iterator.hasNext()) {
Centroid centroid = iterator.next();
streamOutput.writeVInt(centroid.count());
streamOutput.writeDouble(centroid.mean());
}
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
}

@Override
protected List<SearchPlugin> getSearchPlugins() {
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
Expand Down

0 comments on commit fc13b72

Please sign in to comment.