Skip to content

Commit

Permalink
Less jank in after-key parsing for unmapped fields (#86359) (#97282)
Browse files Browse the repository at this point in the history
Resolves #85928

The after-key parsing is pretty weird, and there are probably more bugs there. I did not take the opportunity to refactor the whole thing, but we should. This fixes the immediate problem by treating after keys as bytes refs when we don't have a field but think we want a keyword. We were already doing that if the user asked for a missing bucket, this just extends the behavior in the case that we don't.

Long term, the terms Composite source (and probably other Composite sources) should have specializations for unmapped fields. That's the direction we want to take aggs in general.
  • Loading branch information
not-napoleon committed Jul 3, 2023
1 parent 22ca339 commit 29ff32e
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/changelog/97282.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 97282
summary: Less jank in after-key parsing for unmapped fields
area: Aggregations
type: bug
issues:
- 85928
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void setAfter(Comparable<?> value) {
if (missingBucket && value == null) {
afterValue = null;
afterValueGlobalOrd = MISSING_VALUE_FLAG;
} else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
} else if (value.getClass() == String.class || (fieldType == null)) {
// the value might be not string if this field is missing in this shard but present in other shards
// and doesn't have a string type
afterValue = format.parseBytesRef(value.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,16 @@ public void testUnmappedFieldWithTerms() throws Exception {
createDocument("keyword", "c")
)
);

// Only aggregate on unmapped field, no missing bucket => no results
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder("name", Arrays.asList(new TermsValuesSourceBuilder("unmapped").field("unmapped"))),
(result) -> { assertEquals(0, result.getBuckets().size()); }
);

// Only aggregate on unmapped field, missing bucket => one null bucket with all values
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
Expand All @@ -173,6 +176,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
}
);

// Only aggregate on the unmapped field, after key for that field is set as `null` => no results
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
Expand All @@ -183,6 +187,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
(result) -> { assertEquals(0, result.getBuckets().size()); }
);

// Mapped field first, then unmapped, no missing bucket => no results
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
Expand All @@ -196,6 +201,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
(result) -> { assertEquals(0, result.getBuckets().size()); }
);

// Mapped + unmapped, include missing => 3 buckets
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
Expand All @@ -217,6 +223,91 @@ public void testUnmappedFieldWithTerms() throws Exception {
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}
);

// Unmapped field, keyword after key, unmapped sorts after, include unmapped => 1 bucket
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder(
"name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.LAST)
)
).aggregateAfter(Collections.singletonMap("unmapped", "cat")),
(InternalComposite result) -> {
assertEquals(1, result.getBuckets().size());
assertEquals("{unmapped=null}", result.afterKey().toString());
assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString());
assertEquals(5L, result.getBuckets().get(0).getDocCount());
}
);

// Unmapped field, keyword after key, unmapped sorts before, include unmapped => 0 buckets
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder(
"name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.FIRST)
)
).aggregateAfter(Collections.singletonMap("unmapped", "cat")),
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
);

// Unmapped field, number after key, unmapped sorts after, include unmapped => 1 bucket
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder(
"name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.LAST)
)
).aggregateAfter(Collections.singletonMap("unmapped", 42)),
(InternalComposite result) -> {
assertEquals(1, result.getBuckets().size());
assertEquals("{unmapped=null}", result.afterKey().toString());
assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString());
assertEquals(5L, result.getBuckets().get(0).getDocCount());
}
);

// Unmapped field, number after key, unmapped sorts before, include unmapped => 0 buckets
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder(
"name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.FIRST)
)
).aggregateAfter(Collections.singletonMap("unmapped", 42)),
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
);

}

public void testUnmappedTermsLongAfter() throws Exception {
final List<Map<String, List<Object>>> dataset = new ArrayList<>();
dataset.addAll(
Arrays.asList(
createDocument("keyword", "a"),
createDocument("keyword", "c"),
createDocument("keyword", "a"),
createDocument("keyword", "d"),
createDocument("keyword", "c")
)
);

// Unmapped field, number after key, no missing bucket => 0 buckets
testSearchCase(
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
dataset,
() -> new CompositeAggregationBuilder("name", Arrays.asList(new TermsValuesSourceBuilder("unmapped").field("unmapped")))
.aggregateAfter(Collections.singletonMap("unmapped", 42)),
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
);
}

public void testUnmappedFieldWithGeopoint() throws Exception {
Expand Down

0 comments on commit 29ff32e

Please sign in to comment.