Skip to content

Commit

Permalink
Significant terms test refactor for extendability (#75452)
Browse files Browse the repository at this point in the history
The original PR #75264 made some test mistakes

NXY Significant term heuristics have additional values that need to be set when testing
basicScore properties.

Additionally, previous refactor kept the abstract test class in a package that other plugins
don't have access to.

closes #75442, #75561
  • Loading branch information
benwtrent committed Jul 21, 2021
1 parent ea635d6 commit 9b88db7
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected long getSupersetSize() {
}

@Override
protected SignificanceHeuristic getSignificanceHeuristic() {
public SignificanceHeuristic getSignificanceHeuristic() {
return significanceHeuristic;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
public class SignificantLongTerms extends InternalMappedSignificantTerms<SignificantLongTerms, SignificantLongTerms.Bucket> {
public static final String NAME = "siglterms";

static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
public static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {

long term;

Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
DocValueFormat format, double score) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
this.term = term;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ protected boolean serializeTargetValueType(Version version) {
return true;
}

protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() {
public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {
return new TermsAggregator.BucketCountThresholds(bucketCountThresholds);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,17 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase;

public class ChiSquareTests extends AbstractSignificanceHeuristicTests {
@Override
protected SignificanceHeuristic getHeuristic() {
return new ChiSquare(randomBoolean(), randomBoolean());
}
public class ChiSquareTests extends AbstractNXYSignificanceHeuristicTestCase {

@Override
protected boolean testZeroScore() {
return false;
public void testAssertions() {
testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false));
}

@Override
public void testAssertions() {
testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false));
protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) {
return new ChiSquare(includeNegatives, backgroundIsSuperset);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,16 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

import static org.hamcrest.Matchers.equalTo;

public class GNDTests extends AbstractSignificanceHeuristicTests {
public class GNDTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new GND(randomBoolean());
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testBackgroundAssertions(new GND(true), new GND(false));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

public class JLHScoreTests extends AbstractSignificanceHeuristicTests {
public class JLHScoreTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new JLHScore();
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testAssertions(new JLHScore());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,18 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.lessThanOrEqualTo;

public class MutualInformationTests extends AbstractSignificanceHeuristicTests {
public class MutualInformationTests extends AbstractNXYSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new MutualInformation(randomBoolean(), randomBoolean());
}

@Override
protected boolean testZeroScore() {
return false;
protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) {
return new MutualInformation(includeNegatives, backgroundIsSuperset);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

public class PercentageScoreTests extends AbstractSignificanceHeuristicTests {
public class PercentageScoreTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new PercentageScore();
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testAssertions(new PercentageScore());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket;

import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;

/**
* Abstract test case for testing NXY significant term heuristics
*/
public abstract class AbstractNXYSignificanceHeuristicTestCase extends AbstractSignificanceHeuristicTestCase {

/**
* @return A random instance of the heuristic to test
*/
@Override
protected SignificanceHeuristic getHeuristic() {
return getHeuristic(randomBoolean(), randomBoolean());
}

/**
* @param includeNegatives value for this test run, should the scores include negative values.
* @param backgroundIsSuperset value for this test run, indicates in NXY significant terms if the background is indeed
* a superset of the the subset, or is instead a disjoint set
* @return A random instance of an NXY heuristic to test
*/
protected abstract SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset);

@Override
public void testBasicScoreProperties() {
testBasicScoreProperties(getHeuristic(true, true), false);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.terms;
package org.elasticsearch.search.aggregations.bucket;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
Expand All @@ -28,6 +28,12 @@
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.terms.InternalMappedSignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.InternalSignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantLongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantStringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.InternalAggregationTestCase;
Expand Down Expand Up @@ -56,18 +62,13 @@
/**
* Abstract test case for testing significant term heuristics
*/
public abstract class AbstractSignificanceHeuristicTests extends ESTestCase {
public abstract class AbstractSignificanceHeuristicTestCase extends ESTestCase {

/**
* @return A random instance of the heuristic to test
*/
protected abstract SignificanceHeuristic getHeuristic();

/**
* @return test if the score is `0` with a subset frequency of `0`
*/
protected abstract boolean testZeroScore();

// test that stream output can actually be read - does not replace bwc test
public void testStreamResponse() throws Exception {
Version version = randomVersion(random());
Expand All @@ -83,13 +84,11 @@ public void testStreamResponse() throws Exception {
ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
StreamInput in = new InputStreamStreamInput(inBuffer);
// populates the registry through side effects
SearchModule searchModule = new SearchModule(Settings.EMPTY, emptyList());
NamedWriteableRegistry registry = new NamedWriteableRegistry(searchModule.getNamedWriteables());
in = new NamedWriteableAwareStreamInput(in, registry);
in = new NamedWriteableAwareStreamInput(in, writableRegistry());
in.setVersion(version);
InternalMappedSignificantTerms<?, ?> read = (InternalMappedSignificantTerms<?, ?>) in.readNamedWriteable(InternalAggregation.class);

assertEquals(sigTerms.significanceHeuristic, read.significanceHeuristic);
assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic());
SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0);
SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0);
assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString()));
Expand Down Expand Up @@ -127,11 +126,14 @@ public void testReduce() {
}

public void testBasicScoreProperties() {
SignificanceHeuristic heuristic = getHeuristic();
testBasicScoreProperties(getHeuristic(), true);
}

protected void testBasicScoreProperties(SignificanceHeuristic heuristic, boolean testZeroScore) {
assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0));
assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3)));
assertThat(heuristic.getScore(1, 1, 3, 4), lessThan(heuristic.getScore(1, 1, 2, 4)));
if (testZeroScore()) {
if (testZeroScore) {
assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(0.0));
}

Expand All @@ -150,8 +152,8 @@ public void testBasicScoreProperties() {
/**
* Testing heuristic specific assertions
* Typically, this method would call either
* {@link AbstractSignificanceHeuristicTests#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)}
* or {@link AbstractSignificanceHeuristicTests#testAssertions(SignificanceHeuristic)}
* {@link AbstractSignificanceHeuristicTestCase#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)}
* or {@link AbstractSignificanceHeuristicTestCase#testAssertions(SignificanceHeuristic)}
* depending on which was appropriate
*/
public abstract void testAssertions();
Expand Down Expand Up @@ -206,9 +208,9 @@ public void testParseFailure() throws IOException {
// Create aggregations as they might come from three different shards and return as list.
private List<InternalAggregation> createInternalAggregations() {
SignificanceHeuristic significanceHeuristic = getHeuristic();
AbstractSignificanceHeuristicTests.TestAggFactory<?, ?> factory = randomBoolean() ?
new AbstractSignificanceHeuristicTests.StringTestAggFactory() :
new AbstractSignificanceHeuristicTests.LongTestAggFactory();
AbstractSignificanceHeuristicTestCase.TestAggFactory<?, ?> factory = randomBoolean() ?
new AbstractSignificanceHeuristicTestCase.StringTestAggFactory() :
new AbstractSignificanceHeuristicTestCase.LongTestAggFactory();

List<InternalAggregation> aggs = new ArrayList<>();
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0)));
Expand Down Expand Up @@ -273,6 +275,11 @@ protected NamedXContentRegistry xContentRegistry() {
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents());
}

@Override
protected NamedWriteableRegistry writableRegistry() {
return new NamedWriteableRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedWriteables());
}

protected void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, SignificanceHeuristic heuristicNotSuperset) {
try {
heuristicIsSuperset.getScore(2, 3, 1, 4);
Expand Down

0 comments on commit 9b88db7

Please sign in to comment.