Skip to content

Commit

Permalink
ESQL: Make fieldcaps calls lighter (#102510)
Browse files Browse the repository at this point in the history
Fixes #101763 Fixes
#102393. Tests updates
pending.

This change will make queries that don't need any fields from the
`_field_caps` call to actually ask only for `_index` field, instead of
asking all of them. This will make the `_field_caps` call itself much
lighter and its response size almost inexistent (it contains only the
list of indices and `_index`).
  • Loading branch information
astefan committed Nov 23, 2023
1 parent c390d66 commit 9206a47
Show file tree
Hide file tree
Showing 12 changed files with 276 additions and 52 deletions.
7 changes: 7 additions & 0 deletions docs/changelog/102510.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 102510
summary: "ESQL: Make fieldcaps calls lighter"
area: ES|QL
type: enhancement
issues:
- 101763
- 102393
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,7 @@ emp_no:integer | salary_change:double |salary_change.int:integer|salary_chan


projectAllButConstant
from employees | eval c = 1 | keep c | limit 2
;
from employees | eval c = 1 | keep c | limit 2;

c:i
1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,10 +532,14 @@ c:l
10
;

countStar
from employees | stats count=count(*) | sort count desc | limit 0;

count:l
;

countAllGrouped
from employees | stats c = count(*) by languages | rename languages as l | sort l DESC ;
from employees | stats c = count(*) by languages | rename languages as l | sort l DESC;

c:l | l:i
10 |null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
import static org.elasticsearch.xpack.ql.type.DataTypes.NESTED;

public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerContext> {
static final List<Attribute> NO_FIELDS = List.of(
new ReferenceAttribute(Source.EMPTY, "<no-fields>", DataTypes.NULL, null, Nullability.TRUE, null, false)
);
private static final Iterable<RuleExecutor.Batch<LogicalPlan>> rules;

static {
Expand Down Expand Up @@ -142,7 +145,7 @@ protected LogicalPlan rule(EsqlUnresolvedRelation plan, AnalyzerContext context)
EsIndex esIndex = context.indexResolution().get();
var attributes = mappingAsAttributes(plan.source(), esIndex.mapping());
attributes.addAll(plan.metadataFields());
return new EsRelation(plan.source(), esIndex, attributes);
return new EsRelation(plan.source(), esIndex, attributes.isEmpty() ? NO_FIELDS : attributes);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.xpack.esql.optimizer.PhysicalPlanOptimizer;
import org.elasticsearch.xpack.esql.parser.EsqlParser;
import org.elasticsearch.xpack.esql.parser.TypedParamValue;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.Keep;
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize;
Expand All @@ -37,6 +38,7 @@
import org.elasticsearch.xpack.ql.expression.Alias;
import org.elasticsearch.xpack.ql.expression.Attribute;
import org.elasticsearch.xpack.ql.expression.AttributeSet;
import org.elasticsearch.xpack.ql.expression.EmptyAttribute;
import org.elasticsearch.xpack.ql.expression.MetadataAttribute;
import org.elasticsearch.xpack.ql.expression.UnresolvedStar;
import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry;
Expand Down Expand Up @@ -185,20 +187,19 @@ private <T> void preAnalyzeIndices(LogicalPlan parsed, ActionListener<IndexResol
} else if (preAnalysis.indices.size() == 1) {
TableInfo tableInfo = preAnalysis.indices.get(0);
TableIdentifier table = tableInfo.id();
var fieldNames = fieldNames(parsed);
var fieldNames = fieldNames(parsed, enrichPolicyMatchFields);

if (enrichPolicyMatchFields.isEmpty() == false && fieldNames != IndexResolver.ALL_FIELDS) {
fieldNames.addAll(enrichPolicyMatchFields);
fieldNames.addAll(subfields(enrichPolicyMatchFields));
}
indexResolver.resolveAsMergedMapping(
table.index(),
fieldNames,
false,
Map.of(),
listener,
EsqlSession::specificValidity,
IndexResolver.PRESERVE_PROPERTIES
IndexResolver.PRESERVE_PROPERTIES,
// TODO no matter what metadata fields are asked in a query, the "allowedMetadataFields" is always _index, does it make
// sense to reflect the actual list of metadata fields instead?
IndexResolver.INDEX_METADATA_FIELD
);
} else {
try {
Expand All @@ -210,7 +211,7 @@ private <T> void preAnalyzeIndices(LogicalPlan parsed, ActionListener<IndexResol
}
}

static Set<String> fieldNames(LogicalPlan parsed) {
static Set<String> fieldNames(LogicalPlan parsed, Set<String> enrichPolicyMatchFields) {
if (false == parsed.anyMatch(plan -> plan instanceof Aggregate || plan instanceof Project)) {
// no explicit columns selection, for example "from employees"
return IndexResolver.ALL_FIELDS;
Expand Down Expand Up @@ -242,6 +243,12 @@ static Set<String> fieldNames(LogicalPlan parsed) {
for (Attribute extracted : re.extractedFields()) {
references.removeIf(attr -> matchByName(attr, extracted.qualifiedName(), false));
}
} else if (p instanceof Enrich) {
AttributeSet enrichRefs = p.references();
// Enrich adds an EmptyAttribute if no match field is specified
// The exact name of the field will be added later as part of enrichPolicyMatchFields Set
enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute);
references.addAll(enrichRefs);
} else {
references.addAll(p.references());
if (p instanceof Keep) {
Expand All @@ -266,10 +273,13 @@ static Set<String> fieldNames(LogicalPlan parsed) {
// otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead
references.removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.qualifiedName()));
Set<String> fieldNames = references.names();
if (fieldNames.isEmpty()) {
return IndexResolver.ALL_FIELDS;
if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) {
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
return IndexResolver.INDEX_METADATA_FIELD;
} else {
fieldNames.addAll(subfields(fieldNames));
fieldNames.addAll(enrichPolicyMatchFields);
fieldNames.addAll(subfields(enrichPolicyMatchFields));
return fieldNames;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,23 @@

package org.elasticsearch.xpack.esql.analysis;

import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Max;
import org.elasticsearch.xpack.esql.plan.logical.EsqlUnresolvedRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Row;
import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject;
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
import org.elasticsearch.xpack.esql.session.EsqlSession;
import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry;
import org.elasticsearch.xpack.ql.expression.Alias;
import org.elasticsearch.xpack.ql.expression.Attribute;
import org.elasticsearch.xpack.ql.expression.Expressions;
Expand All @@ -24,22 +34,29 @@
import org.elasticsearch.xpack.ql.expression.UnresolvedAttribute;
import org.elasticsearch.xpack.ql.index.EsIndex;
import org.elasticsearch.xpack.ql.index.IndexResolution;
import org.elasticsearch.xpack.ql.index.IndexResolver;
import org.elasticsearch.xpack.ql.plan.TableIdentifier;
import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
import org.elasticsearch.xpack.ql.plan.logical.EsRelation;
import org.elasticsearch.xpack.ql.plan.logical.Filter;
import org.elasticsearch.xpack.ql.plan.logical.Limit;
import org.elasticsearch.xpack.ql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.ql.plan.logical.OrderBy;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.DataTypes;
import org.elasticsearch.xpack.ql.type.TypesTests;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;

import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.configuration;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning;
import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyze;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzer;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping;
Expand Down Expand Up @@ -68,7 +85,7 @@ public void testIndexResolution() {
var plan = analyzer.analyze(UNRESOLVED_RELATION);
var limit = as(plan, Limit.class);

assertEquals(new EsRelation(EMPTY, idx, false), limit.child());
assertEquals(new EsRelation(EMPTY, idx, NO_FIELDS), limit.child());
}

public void testFailOnUnresolvedIndex() {
Expand All @@ -86,7 +103,7 @@ public void testIndexWithClusterResolution() {
var plan = analyzer.analyze(UNRESOLVED_RELATION);
var limit = as(plan, Limit.class);

assertEquals(new EsRelation(EMPTY, idx, false), limit.child());
assertEquals(new EsRelation(EMPTY, idx, NO_FIELDS), limit.child());
}

public void testAttributeResolution() {
Expand Down Expand Up @@ -1120,6 +1137,61 @@ public void testAggsWithoutAggAndFollowingCommand() throws Exception {
assertEquals(agg.groupings(), agg.aggregates());
}

public void testEmptyEsRelationOnLimitZeroWithCount() throws IOException {
var query = """
from test*
| stats count=count(*)
| sort count desc
| limit 0""";
var plan = analyzeWithEmptyFieldCapsResponse(query);
var limit = as(plan, Limit.class);
limit = as(limit.child(), Limit.class);
assertThat(limit.limit().fold(), equalTo(0));
var orderBy = as(limit.child(), OrderBy.class);
var agg = as(orderBy.child(), Aggregate.class);
assertEmptyEsRelation(agg.child());
}

public void testEmptyEsRelationOnConstantEvalAndKeep() throws IOException {
var query = """
from test*
| eval c = 1
| keep c
| limit 2""";
var plan = analyzeWithEmptyFieldCapsResponse(query);
var limit = as(plan, Limit.class);
limit = as(limit.child(), Limit.class);
assertThat(limit.limit().fold(), equalTo(2));
var project = as(limit.child(), EsqlProject.class);
var eval = as(project.child(), Eval.class);
assertEmptyEsRelation(eval.child());
}

public void testEmptyEsRelationOnConstantEvalAndStats() throws IOException {
var query = """
from test*
| limit 10
| eval x = 1
| stats c = count(x)""";
var plan = analyzeWithEmptyFieldCapsResponse(query);
var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var eval = as(agg.child(), Eval.class);
limit = as(eval.child(), Limit.class);
assertThat(limit.limit().fold(), equalTo(10));
assertEmptyEsRelation(limit.child());
}

public void testEmptyEsRelationOnCountStar() throws IOException {
var query = """
from test*
| stats c = count(*)""";
var plan = analyzeWithEmptyFieldCapsResponse(query);
var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
assertEmptyEsRelation(agg.child());
}

public void testUnsupportedFieldsInStats() {
var errorMsg = "Cannot use field [point] with unsupported type [geo_point]";

Expand Down Expand Up @@ -1361,4 +1433,30 @@ protected List<String> filteredWarnings() {
return withDefaultLimitWarning(super.filteredWarnings());
}

private static LogicalPlan analyzeWithEmptyFieldCapsResponse(String query) throws IOException {
IndexResolution resolution = IndexResolver.mergedMappings(
EsqlDataTypeRegistry.INSTANCE,
"test*",
readFieldCapsResponse("empty_field_caps_response.json"),
EsqlSession::specificValidity,
IndexResolver.PRESERVE_PROPERTIES,
IndexResolver.INDEX_METADATA_FIELD
);
var analyzer = analyzer(resolution, TEST_VERIFIER, configuration(query));
return analyze(query, analyzer);
}

private static FieldCapabilitiesResponse readFieldCapsResponse(String resourceName) throws IOException {
InputStream stream = AnalyzerTests.class.getResourceAsStream("/" + resourceName);
BytesReference ref = Streams.readFully(stream);
XContentParser parser = XContentHelper.createParser(XContentParserConfiguration.EMPTY, ref, XContentType.JSON);
return FieldCapabilitiesResponse.fromXContent(parser);
}

private void assertEmptyEsRelation(LogicalPlan plan) {
assertThat(plan, instanceOf(EsRelation.class));
EsRelation esRelation = (EsRelation) plan;
assertThat(esRelation.output(), equalTo(NO_FIELDS));
assertTrue(esRelation.index().mapping().isEmpty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2511,6 +2511,16 @@ public void testMultipleRenameStatsDropGroupMultirow() {
var row = as(agg.child(), EsRelation.class);
}

public void testLimitZeroUsesLocalRelation() {
LogicalPlan plan = optimizedPlan("""
from test
| stats count=count(*)
| sort count desc
| limit 0""");

assertThat(plan, instanceOf(LocalRelation.class));
}

private <T> T aliased(Expression exp, Class<T> clazz) {
var alias = as(exp, Alias.class);
return as(alias.child(), clazz);
Expand Down

0 comments on commit 9206a47

Please sign in to comment.