Skip to content

Commit

Permalink
handle empty sketches (#7526) (#7552)
Browse files Browse the repository at this point in the history
* handle empty sketches

* return array of NaN in case of empty sketch

* noinspection ForLoopReplaceableByForEach in tests

* style fixes
  • Loading branch information
clintropolis committed Apr 25, 2019
1 parent 58dcb23 commit d4dde9c
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ public DoublesSketchToHistogramPostAggregator(
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
if (sketch.isEmpty()) {
final double[] histogram = new double[splitPoints.length + 1];
Arrays.fill(histogram, Double.NaN);
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints);
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= sketch.getN();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ public double[] getFractions()
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
if (sketch.isEmpty()) {
final double[] quantiles = new double[fractions.length];
Arrays.fill(quantiles, Double.NaN);
return quantiles;
}
return sketch.getQuantiles(fractions);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.aggregation.datasketches.quantiles;

import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.Map;

public class DoublesSketchToHistogramPostAggregatorTest
{
@Test
public void emptySketch()
{
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);

final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());

final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator(
"histogram",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {3.5}
);

final double[] histogram = (double[]) postAgg.compute(fields);
Assert.assertNotNull(histogram);
Assert.assertEquals(2, histogram.length);
Assert.assertTrue(Double.isNaN(histogram[0]));
Assert.assertTrue(Double.isNaN(histogram[1]));
}

@Test
public void normalCase()
{
final double[] values = new double[] {1, 2, 3, 4, 5, 6};
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);

final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
//noinspection ForLoopReplaceableByForEach
for (int i = 0; i < values.length; i++) {
agg.aggregate();
selector.increment();
}

final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());

final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator(
"histogram",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {3.5} // splits distribution in two buckets of equal mass
);

final double[] histogram = (double[]) postAgg.compute(fields);
Assert.assertNotNull(histogram);
Assert.assertEquals(2, histogram.length);
Assert.assertEquals(3.0, histogram[0], 0);
Assert.assertEquals(3.0, histogram[1], 0);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.aggregation.datasketches.quantiles;

import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.Map;

public class DoublesSketchToQuantilesPostAggregatorTest
{
@Test
public void emptySketch()
{
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);

final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());

final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator(
"quantiles",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {0, 0.5, 1}
);

final double[] quantiles = (double[]) postAgg.compute(fields);
Assert.assertNotNull(quantiles);
Assert.assertEquals(3, quantiles.length);
Assert.assertTrue(Double.isNaN(quantiles[0]));
Assert.assertTrue(Double.isNaN(quantiles[1]));
Assert.assertTrue(Double.isNaN(quantiles[2]));
}

@Test
public void normalCase()
{
final double[] values = new double[] {1, 2, 3, 4, 5};
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);

final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
//noinspection ForLoopReplaceableByForEach
for (int i = 0; i < values.length; i++) {
agg.aggregate();
selector.increment();
}

final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());

final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator(
"quantiles",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {0, 0.5, 1}
);

final double[] quantiles = (double[]) postAgg.compute(fields);
Assert.assertNotNull(quantiles);
Assert.assertEquals(3, quantiles.length);
Assert.assertEquals(1.0, quantiles[0], 0);
Assert.assertEquals(3.0, quantiles[1], 0);
Assert.assertEquals(5.0, quantiles[2], 0);
}
}

0 comments on commit d4dde9c

Please sign in to comment.