Extract histogramFieldDocValues into an utility class (#63100) (#63148)

This function will be needed in the upcoming rate aggs tests.
elastic · Oct 1, 2020 · fc13b72 · fc13b72
1 parent 3ad4b00
commit fc13b72
Show file tree

Hide file tree

Showing 9 changed files with 171 additions and 317 deletions.
diff --git a/...plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/AnalyticsTestsUtils.java b/...plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/AnalyticsTestsUtils.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+
+package org.elasticsearch.xpack.analytics;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.search.aggregations.metrics.TDigestState;
+
+import com.tdunning.math.stats.Centroid;
+import com.tdunning.math.stats.TDigest;
+
+public final class AnalyticsTestsUtils {
+
+    /**
+     * Generates an index fields for histogram fields. Used in tests of aggregations that work on histogram fields.
+     */
+    public static BinaryDocValuesField histogramFieldDocValues(String fieldName, double[] values) throws IOException {
+        TDigest histogram = new TDigestState(100.0); //default
+        for (double value : values) {
+            histogram.add(value);
+        }
+        BytesStreamOutput streamOutput = new BytesStreamOutput();
+        histogram.compress();
+        for (Centroid centroid : histogram.centroids()) {
+            streamOutput.writeVInt(centroid.count());
+            streamOutput.writeDouble(centroid.mean());
+        }
+        return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
+    }
+
+}
diff --git a/...ch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java b/...ch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java
@@ -6,46 +6,39 @@
 
 package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram;
 
-import com.tdunning.math.stats.Centroid;
-import com.tdunning.math.stats.TDigest;
-import org.apache.lucene.document.BinaryDocValuesField;
+import static java.util.Collections.singleton;
+import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
+
+import java.util.Collections;
+import java.util.List;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.store.Directory;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.plugins.SearchPlugin;
 import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
-import org.elasticsearch.search.aggregations.metrics.TDigestState;
 import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
 import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
 import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
 
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-
-import static java.util.Collections.singleton;
-
 public class HistoBackedHistogramAggregatorTests extends AggregatorTestCase {
 
     private static final String FIELD_NAME = "field";
 
     public void testHistograms() throws Exception {
         try (Directory dir = newDirectory();
                 RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30})));
 
             HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
                     .field(FIELD_NAME)
@@ -80,9 +73,9 @@ public void testHistograms() throws Exception {
     public void testMinDocCount() throws Exception {
         try (Directory dir = newDirectory();
              RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30, 90})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30, 90})));
 
             HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
                 .field(FIELD_NAME)
@@ -109,8 +102,8 @@ public void testRandomOffset() throws Exception {
         try (Directory dir = newDirectory();
              RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
             // Note, these values are carefully chosen to ensure that no matter what offset we pick, no two can end up in the same bucket
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3.2, 9.3})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {3.2, 9.3})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));
 
             final double offset = randomDouble();
             final double interval = 5;
@@ -143,8 +136,8 @@ public void testExtendedBounds() throws Exception {
         try (Directory dir = newDirectory();
              RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
 
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-4.5, 4.3})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));
 
             HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
                 .field(FIELD_NAME)
@@ -178,8 +171,8 @@ public void testSubAggs() throws Exception {
         try (Directory dir = newDirectory();
              RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
 
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3})));
-            w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 })));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-4.5, 4.3})));
+            w.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-5, 3.2 })));
 
             HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
                 .field(FIELD_NAME)
@@ -198,23 +191,6 @@ public void testSubAggs() throws Exception {
         }
     }
 
-    private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
-        TDigest histogram = new TDigestState(100.0); //default
-        for (double value : values) {
-            histogram.add(value);
-        }
-        BytesStreamOutput streamOutput = new BytesStreamOutput();
-        histogram.compress();
-        Collection<Centroid> centroids = histogram.centroids();
-        Iterator<Centroid> iterator = centroids.iterator();
-        while ( iterator.hasNext()) {
-            Centroid centroid = iterator.next();
-            streamOutput.writeVInt(centroid.count());
-            streamOutput.writeDouble(centroid.mean());
-        }
-        return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
-    }
-
     @Override
     protected List<SearchPlugin> getSearchPlugins() {
         return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));

diff --git a/...org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregatorTests.java b/...org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregatorTests.java
@@ -5,9 +5,6 @@
  */
 package org.elasticsearch.xpack.analytics.aggregations.metrics;
 
-import com.tdunning.math.stats.Centroid;
-import com.tdunning.math.stats.TDigest;
-import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -16,15 +13,13 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.elasticsearch.common.CheckedConsumer;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.plugins.SearchPlugin;
 import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
 import org.elasticsearch.search.aggregations.metrics.InternalAvg;
-import org.elasticsearch.search.aggregations.metrics.TDigestState;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.aggregations.support.ValuesSourceType;
@@ -34,14 +29,13 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.function.Consumer;
 
 import static java.util.Collections.singleton;
 import static org.elasticsearch.search.aggregations.AggregationBuilders.avg;
+import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
 
 public class HistoBackedAvgAggregatorTests extends AggregatorTestCase {
 
@@ -58,8 +52,8 @@ public void testNoDocs() throws IOException {
 
     public void testNoMatchingField() throws IOException {
         testCase(new MatchAllDocsQuery(), iw -> {
-            iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
-            iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
+            iw.addDocument(singleton(histogramFieldDocValues("wrong_field", new double[] {3, 1.2, 10})));
+            iw.addDocument(singleton(histogramFieldDocValues("wrong_field", new double[] {5.3, 6, 20})));
         }, avg -> {
             assertEquals(Double.NaN, avg.getValue(), 0d);
             assertFalse(AggregationInspectionHelper.hasValue(avg));
@@ -68,9 +62,9 @@ public void testNoMatchingField() throws IOException {
 
     public void testSimpleHistogram() throws IOException {
         testCase(new MatchAllDocsQuery(), iw -> {
-            iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
-            iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
-            iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
+            iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10})));
+            iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
+            iw.addDocument(singleton(histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
         }, avg -> {
             assertEquals(12.0463d, avg.getValue(), 0.01d);
             assertTrue(AggregationInspectionHelper.hasValue(avg));
@@ -81,23 +75,23 @@ public void testQueryFiltering() throws IOException {
         testCase(new TermQuery(new Term("match", "yes")), iw -> {
             iw.addDocument(Arrays.asList(
                 new StringField("match", "yes", Field.Store.NO),
-                getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
+                histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
             );
             iw.addDocument(Arrays.asList(
                 new StringField("match", "yes", Field.Store.NO),
-                getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
+                histogramFieldDocValues(FIELD_NAME, new double[] {5.3, 6, 20}))
             );
             iw.addDocument(Arrays.asList(
                 new StringField("match", "no", Field.Store.NO),
-                getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
+                histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
             );
             iw.addDocument(Arrays.asList(
                 new StringField("match", "no", Field.Store.NO),
-                getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
+                histogramFieldDocValues(FIELD_NAME, new double[] {3, 1.2, 10}))
             );
             iw.addDocument(Arrays.asList(
                 new StringField("match", "yes", Field.Store.NO),
-                getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
+                histogramFieldDocValues(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
             );
         }, avg -> {
             assertEquals(12.651d, avg.getValue(), 0.01d);
@@ -111,23 +105,6 @@ private void testCase(Query query,
         testCase(avg("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
     }
 
-    private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
-        TDigest histogram = new TDigestState(100.0); //default
-        for (double value : values) {
-            histogram.add(value);
-        }
-        BytesStreamOutput streamOutput = new BytesStreamOutput();
-        histogram.compress();
-        Collection<Centroid> centroids = histogram.centroids();
-        Iterator<Centroid> iterator = centroids.iterator();
-        while ( iterator.hasNext()) {
-            Centroid centroid = iterator.next();
-            streamOutput.writeVInt(centroid.count());
-            streamOutput.writeDouble(centroid.mean());
-        }
-        return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
-    }
-
     @Override
     protected List<SearchPlugin> getSearchPlugins() {
         return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));