Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
public abstract class InternalSignificantTerms<A extends InternalSignificantTerms<A, B>, B extends InternalSignificantTerms.Bucket<B>>
extends InternalMultiBucketAggregation<A, B> implements SignificantTerms, ToXContent {

private static final String SCORE = "score";
private static final String BG_COUNT = "bg_count";
public static final String SCORE = "score";
public static final String BG_COUNT = "bg_count";

@SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
public abstract static class Bucket<B extends Bucket<B>> extends InternalMultiBucketAggregation.InternalBucket
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;

import java.io.IOException;

public class ParsedSignificantLongTerms extends ParsedSignificantTerms {

@Override
public String getType() {
return SignificantLongTerms.NAME;
}

private static ObjectParser<ParsedSignificantLongTerms, Void> PARSER =
new ObjectParser<>(ParsedSignificantLongTerms.class.getSimpleName(), true, ParsedSignificantLongTerms::new);
static {
declareParsedSignificantTermsFields(PARSER, ParsedBucket::fromXContent);
}

public static ParsedSignificantLongTerms fromXContent(XContentParser parser, String name) throws IOException {
ParsedSignificantLongTerms aggregation = PARSER.parse(parser, null);
aggregation.setName(name);
return aggregation;
}

public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {

private Long key;

@Override
public Object getKey() {
return key;
}

@Override
public String getKeyAsString() {
String keyAsString = super.getKeyAsString();
if (keyAsString != null) {
return keyAsString;
}
return Long.toString(key);
}

public Number getKeyAsNumber() {
return key;
}

@Override
public int compareTerm(SignificantTerms.Bucket other) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this method needed? I think it isn't used anywhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems it's not used but I prefer to leave it in core - I saw something about it might be needed in scripted aggs or something. I'll try to figure this out, and if it's unused then I'll create a separate PR in core.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #24714 to remove the method.

return key.compareTo(((ParsedBucket) other).key);
}

@Override
protected XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
builder.field(CommonFields.KEY.getPreferredName(), key);
if (super.getKeyAsString() != null) {
builder.field(CommonFields.KEY_AS_STRING.getPreferredName(), getKeyAsString());
}
return builder;
}

static ParsedBucket fromXContent(XContentParser parser) throws IOException {
return parseSignificantTermsBucketXContent(parser, new ParsedBucket(), (p, bucket) -> bucket.key = p.longValue());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.bucket.significant;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;

import java.io.IOException;

public class ParsedSignificantStringTerms extends ParsedSignificantTerms {

@Override
public String getType() {
return SignificantStringTerms.NAME;
}

private static ObjectParser<ParsedSignificantStringTerms, Void> PARSER =
new ObjectParser<>(ParsedSignificantStringTerms.class.getSimpleName(), true, ParsedSignificantStringTerms::new);
static {
declareParsedSignificantTermsFields(PARSER, ParsedBucket::fromXContent);
}

public static ParsedSignificantStringTerms fromXContent(XContentParser parser, String name) throws IOException {
ParsedSignificantStringTerms aggregation = PARSER.parse(parser, null);
aggregation.setName(name);
return aggregation;
}

public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {

private BytesRef key;

@Override
public Object getKey() {
return getKeyAsString();
}

@Override
public String getKeyAsString() {
String keyAsString = super.getKeyAsString();
if (keyAsString != null) {
return keyAsString;
}
return key.utf8ToString();
}

public Number getKeyAsNumber() {
return Double.parseDouble(key.utf8ToString());
}

@Override
public int compareTerm(SignificantTerms.Bucket other) {
return key.compareTo(((ParsedBucket) other).key);
}

@Override
protected XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
return builder.field(CommonFields.KEY.getPreferredName(), getKey());
}

static ParsedBucket fromXContent(XContentParser parser) throws IOException {
return parseSignificantTermsBucketXContent(parser, new ParsedBucket(), (p, bucket) -> bucket.key = p.utf8BytesOrNull());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregation<ParsedSignificantTerms.ParsedBucket>
implements SignificantTerms {

private Map<String, ParsedBucket> bucketMap;
protected long subsetSize;

protected long getSubsetSize() {
return subsetSize;
}

@Override
public List<? extends SignificantTerms.Bucket> getBuckets() {
return buckets;
}

@Override
public SignificantTerms.Bucket getBucketByKey(String term) {
if (bucketMap == null) {
bucketMap = buckets.stream().collect(Collectors.toMap(SignificantTerms.Bucket::getKeyAsString, Function.identity()));
}
return bucketMap.get(term);
}

@Override
public Iterator<SignificantTerms.Bucket> iterator() {
return buckets.stream().map(bucket -> (SignificantTerms.Bucket) bucket).collect(Collectors.toList()).iterator();
}

@Override
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
builder.startArray(CommonFields.BUCKETS.getPreferredName());
for (SignificantTerms.Bucket bucket : buckets) {
bucket.toXContent(builder, params);
}
builder.endArray();
return builder;
}

static void declareParsedSignificantTermsFields(final ObjectParser<? extends ParsedSignificantTerms, Void> objectParser,
final CheckedFunction<XContentParser, ParsedSignificantTerms.ParsedBucket, IOException> bucketParser) {
declareMultiBucketAggregationFields(objectParser, bucketParser::apply, bucketParser::apply);
objectParser.declareLong((parsedTerms, value) -> parsedTerms.subsetSize = value , CommonFields.DOC_COUNT);
}

public abstract static class ParsedBucket extends ParsedMultiBucketAggregation.ParsedBucket implements SignificantTerms.Bucket {

protected long subsetDf;
protected long supersetDf;
protected double score;

@Override
public long getDocCount() {
return getSubsetDf();
}

@Override
public long getSubsetDf() {
return subsetDf;
}

@Override
public long getSupersetDf() {
return supersetDf;
}

@Override
public double getSignificanceScore() {
return score;
}

@Override
public long getSupersetSize() {
throw new UnsupportedOperationException();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering if getSuperset/SubsetSize is part of the Bucket interface but not rendered via the Rest response, should we either add rendering of these values to the bucket response or remove it from the interface to get equivalent behaviour of functionality of the transport client with the high level rest client here? I think this can be done in a separate issue though, maybe its not needed at all.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe @markharwood has an opinion on this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay with the UnsupportedOperationException for now if we can track this question (whether we can reach consistency between the functionality the transport client provides via the SignificantTerms.Bucket interface with the rest response) in a separate issue

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This took me some time to wrap my head around this, but I finally created #24865 to address this.

}

@Override
public long getSubsetSize() {
throw new UnsupportedOperationException();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We render a subsetSize as the DOC_COUNT field in the surrounding aggregation, I'm not sure if this is equivalent with the bucket subset size but maybe it could be used here? Probably would need some checking with somebody who knows the aggregation better though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how they are related to be honest. Since we don't render the superset size and subset size I think it's ok to throw an unsupported operation exception here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussion that gives more background around where these fields come from: #5146 (comment)

}

@Override
public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
keyToXContent(builder);
builder.field(CommonFields.DOC_COUNT.getPreferredName(), getDocCount());
builder.field(InternalSignificantTerms.SCORE, getSignificanceScore());
builder.field(InternalSignificantTerms.BG_COUNT, getSupersetDf());
getAggregations().toXContentInternal(builder, params);
builder.endObject();
return builder;
}

protected abstract XContentBuilder keyToXContent(XContentBuilder builder) throws IOException;

static <B extends ParsedBucket> B parseSignificantTermsBucketXContent(final XContentParser parser, final B bucket,
final CheckedBiConsumer<XContentParser, B, IOException> keyConsumer) throws IOException {

final List<Aggregation> aggregations = new ArrayList<>();
XContentParser.Token token;
String currentFieldName = parser.currentName();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if (CommonFields.KEY_AS_STRING.getPreferredName().equals(currentFieldName)) {
bucket.setKeyAsString(parser.text());
} else if (CommonFields.KEY.getPreferredName().equals(currentFieldName)) {
keyConsumer.accept(parser, bucket);
} else if (CommonFields.DOC_COUNT.getPreferredName().equals(currentFieldName)) {
long value = parser.longValue();
bucket.subsetDf = value;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one we set although it is never printed out as part of toXContent?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We render the doc count which is in fact the subsetDf.

bucket.setDocCount(value);
} else if (InternalSignificantTerms.SCORE.equals(currentFieldName)) {
bucket.score = parser.longValue();
} else if (InternalSignificantTerms.BG_COUNT.equals(currentFieldName)) {
bucket.supersetDf = parser.longValue();
}
} else if (token == XContentParser.Token.START_OBJECT) {
aggregations.add(XContentParserUtils.parseTypedKeysObject(parser, Aggregation.TYPED_KEYS_DELIMITER, Aggregation.class));
}
}
bucket.setAggregations(new Aggregations(aggregations));
return bucket;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.elasticsearch.search.aggregations.bucket.range.date.InternalDateRangeTests;
import org.elasticsearch.search.aggregations.bucket.range.geodistance.InternalGeoDistanceTests;
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSamplerTests;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantLongTermsTests;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsTests;
import org.elasticsearch.search.aggregations.bucket.terms.DoubleTermsTests;
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsTests;
import org.elasticsearch.search.aggregations.bucket.terms.StringTermsTests;
Expand Down Expand Up @@ -126,6 +128,8 @@ private static List<InternalAggregationTestCase> getAggsTests() {
aggsTests.add(new InternalGeoDistanceTests());
aggsTests.add(new InternalFiltersTests());
aggsTests.add(new InternalAdjacencyMatrixTests());
aggsTests.add(new SignificantLongTermsTests());
aggsTests.add(new SignificantStringTermsTests());
return Collections.unmodifiableList(aggsTests);
}

Expand Down
Loading