Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract references tracking from SearchLookup #68202

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,7 @@ public boolean allowExpensiveQueries() {

@SuppressWarnings("unchecked")
public <IFD extends IndexFieldData<?>> IFD getForField(MappedFieldType fieldType) {
return (IFD) indexFieldDataService.apply(fieldType, fullyQualifiedIndex.getName(),
() -> this.lookup().forkAndTrackFieldReferences(fieldType.name()));
return (IFD) this.lookup().getForField(fieldType);
}

public void addNamedQuery(String name, Query query) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,100 +23,62 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;

public class SearchLookup {
/**
* The maximum depth of field dependencies.
* When a runtime field's doc values depends on another runtime field's doc values,
* which depends on another runtime field's doc values and so on, it can
* make a very deep stack, which we want to limit.
*/
private static final int MAX_FIELD_CHAIN_DEPTH = 5;

/**
* The chain of fields for which this lookup was created, used for detecting
* loops caused by runtime fields referring to other runtime fields. The chain is empty
* for the "top level" lookup created for the entire search. When a lookup is used to load
* fielddata for a field, we fork it and make sure the field name name isn't in the chain,
* then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If
* that field looks up the values of a field named {@code b} then
* {@code b}'s chain will contain {@code ["a", "b"]}.
*/
private final Set<String> fieldChain;
private final SourceLookup sourceLookup;
private final Function<String, MappedFieldType> fieldTypeLookup;
private final TrackingMappedFieldsLookup fieldTypeLookup;
private final BiFunction<MappedFieldType, Supplier<SearchLookup>, IndexFieldData<?>> fieldDataLookup;

/**
* Create the top level field lookup for a search request. Provides a way to look up fields from doc_values,
* stored fields, or _source.
* Create the top level field lookup for a search request.
*
* Provides a way to look up fields from doc_values, stored fields, or _source.
*/
public SearchLookup(Function<String, MappedFieldType> fieldTypeLookup,
BiFunction<MappedFieldType, Supplier<SearchLookup>, IndexFieldData<?>> fieldDataLookup) {
this.fieldTypeLookup = fieldTypeLookup;
this.fieldChain = Collections.emptySet();
this.fieldTypeLookup = new TrackingMappedFieldsLookup(fieldTypeLookup);
this.sourceLookup = new SourceLookup();
this.fieldDataLookup = fieldDataLookup;
}

/**
* Create a new {@link SearchLookup} that looks fields up the same as the one provided as argument,
* while also tracking field references starting from the provided field name. It detects cycles
* and prevents resolving fields that depend on more than {@link #MAX_FIELD_CHAIN_DEPTH} fields.
* @param searchLookup the existing lookup to create a new one from
* @param fieldChain the chain of fields that required the field currently being loaded
*/
private SearchLookup(SearchLookup searchLookup, Set<String> fieldChain) {
this.fieldChain = Collections.unmodifiableSet(fieldChain);
private SearchLookup(SearchLookup searchLookup, String field) {
this.sourceLookup = searchLookup.sourceLookup;
this.fieldTypeLookup = searchLookup.fieldTypeLookup;
this.fieldTypeLookup = searchLookup.fieldTypeLookup.excludingField(field);
this.fieldDataLookup = searchLookup.fieldDataLookup;
}

/**
* Creates a copy of the current {@link SearchLookup} that looks fields up in the same way, but also tracks field references
* in order to detect cycles and prevent resolving fields that depend on more than {@link #MAX_FIELD_CHAIN_DEPTH} other fields.
* @param field the field being referred to, for which fielddata needs to be loaded
* @return the new lookup
* @throws IllegalArgumentException if a cycle is detected in the fields required to build doc values, or if the field
* being resolved depends on more than {@link #MAX_FIELD_CHAIN_DEPTH}
* Create a new {@link SearchLookup} that looks fields up the same as this one,
* while also tracking field references starting from the provided field name. It detects cycles
* and prevents resolving fields that depend on more than
* {@link TrackingMappedFieldsLookup#MAX_FIELD_CHAIN_DEPTH} fields.
* @param field the field to exclude from further field lookups
*/
public final SearchLookup forkAndTrackFieldReferences(String field) {
Objects.requireNonNull(field, "field cannot be null");
Set<String> newFieldChain = new LinkedHashSet<>(fieldChain);
if (newFieldChain.add(field) == false) {
String message = String.join(" -> ", newFieldChain) + " -> " + field;
throw new IllegalArgumentException("Cyclic dependency detected while resolving runtime fields: " + message);
}
if (newFieldChain.size() > MAX_FIELD_CHAIN_DEPTH) {
throw new IllegalArgumentException("Field requires resolving too many dependent fields: " + String.join(" -> ", newFieldChain));
}
return new SearchLookup(this, newFieldChain);
public SearchLookup forkAndTrackReferences(String field) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had hoped to be able to remove this method entirely, but we need it in runtime field queries if we want the root of the field cycle to be reported accurately. In other words, without this method we still get errors thrown on cycles, but instead of being "field -> a -> b -> field" the cycle is reported as "a -> b -> field -> a". I'm not sure if this is a real problem or not - it's a perfectly good cycle detection wherever it starts from. I lean towards removing it anyway, as I think the additional API surface isn't worth the different error message.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of like the error message being accurate. Could we drop the API and have the queries make TrackingMappedFieldsLookup and immediate call the method to add themselves to its list?

return new SearchLookup(this, field);
}

public LeafSearchLookup getLeafSearchLookup(LeafReaderContext context) {
return new LeafSearchLookup(context,
new LeafDocLookup(fieldTypeLookup, this::getForField, context),
new LeafDocLookup(fieldTypeLookup::get, this::getForField, context),
sourceLookup,
new LeafStoredFieldsLookup(fieldTypeLookup, (doc, visitor) -> context.reader().document(doc, visitor)));
new LeafStoredFieldsLookup(fieldTypeLookup::get, (doc, visitor) -> context.reader().document(doc, visitor)));
}

public MappedFieldType fieldType(String fieldName) {
return fieldTypeLookup.apply(fieldName);
return fieldTypeLookup.get(fieldName);
}

public IndexFieldData<?> getForField(MappedFieldType fieldType) {
return fieldDataLookup.apply(fieldType, () -> forkAndTrackFieldReferences(fieldType.name()));
return fieldDataLookup.apply(fieldType, () -> new SearchLookup(this, fieldType.name()));
}

public SourceLookup source() {
return sourceLookup;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.lookup;

import org.elasticsearch.index.mapper.MappedFieldType;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Function;

/**
* Used for detecting loops caused by fields referring to other fields. The chain is empty
* for the "top level" lookup created for the entire search. When a lookup is used to load
* fielddata for a field, we fork it and make sure the field name name isn't in the chain,
* then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If
* that field looks up the values of a field named {@code b} then
* {@code b}'s chain will contain {@code ["a", "b"]}.
*/
public final class TrackingMappedFieldsLookup {

/**
* The maximum depth of field dependencies.
* When a runtime field's doc values depends on another runtime field's doc values,
* which depends on another runtime field's doc values and so on, it can
* make a very deep stack, which we want to limit.
*/
public static final int MAX_FIELD_CHAIN_DEPTH = 5;

final Function<String, MappedFieldType> lookup;
final Set<String> references;

public TrackingMappedFieldsLookup(Function<String, MappedFieldType> lookup) {
this.lookup = lookup;
this.references = Collections.emptySet();
}

private TrackingMappedFieldsLookup(Function<String, MappedFieldType> lookup, Set<String> references, String field) {
this.lookup = lookup;
this.references = new LinkedHashSet<>(references);
if (this.references.add(field) == false) {
String message = String.join(" -> ", this.references) + " -> " + field;
throw new IllegalArgumentException("Cyclic dependency detected while resolving runtime fields: " + message);
}
if (this.references.size() > MAX_FIELD_CHAIN_DEPTH) {
throw new IllegalArgumentException("Field requires resolving too many dependent fields: "
+ String.join(" -> ", this.references));
}
}

public TrackingMappedFieldsLookup excludingField(String field) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't really exclude the field, right? It tracks it?

return new TrackingMappedFieldsLookup(lookup, this.references, field);
}

public MappedFieldType get(String field) {
return lookup.apply(field);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.search.aggregations.support;

import com.carrotsearch.randomizedtesting.generators.RandomStrings;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected final LeafFactory leafFactory(SearchExecutionContext context) {
* detection code as though we were resolving field data for this field.
* We're not, but running the query is close enough.
*/
return leafFactory(context.lookup().forkAndTrackFieldReferences(name()));
return leafFactory(context.lookup().forkAndTrackReferences(name()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
package org.elasticsearch.xpack.runtimefields.mapper;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -37,6 +39,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;

import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Matchers.anyString;
Expand Down Expand Up @@ -213,7 +216,7 @@ public void testExistsQueryIsExpensive() {
checkExpensiveQuery(MappedFieldType::existsQuery);
}

public void testExistsQueryInLoop() {
public void testExistsQueryInLoop() throws IOException {
checkLoop(MappedFieldType::existsQuery);
}

Expand All @@ -230,7 +233,7 @@ public void testRangeQueryIsExpensive() {
checkExpensiveQuery(this::randomRangeQuery);
}

public void testRangeQueryInLoop() {
public void testRangeQueryInLoop() throws IOException {
assumeTrue("Impl does not support range queries", supportsRangeQueries());
checkLoop(this::randomRangeQuery);
}
Expand All @@ -240,7 +243,7 @@ public void testTermQueryIsExpensive() {
checkExpensiveQuery(this::randomTermQuery);
}

public void testTermQueryInLoop() {
public void testTermQueryInLoop() throws IOException {
assumeTrue("Impl does not support term queries", supportsTermQueries());
checkLoop(this::randomTermQuery);
}
Expand All @@ -250,7 +253,7 @@ public void testTermsQueryIsExpensive() {
checkExpensiveQuery(this::randomTermsQuery);
}

public void testTermsQueryInLoop() {
public void testTermsQueryInLoop() throws IOException {
assumeTrue("Impl does not support term queries", supportsTermQueries());
checkLoop(this::randomTermsQuery);
}
Expand Down Expand Up @@ -301,8 +304,11 @@ protected final void checkExpensiveQuery(BiConsumer<MappedFieldType, SearchExecu
);
}

protected final void checkLoop(BiConsumer<MappedFieldType, SearchExecutionContext> queryBuilder) {
Exception e = expectThrows(IllegalArgumentException.class, () -> queryBuilder.accept(loopFieldType(), mockContext()));
protected final void checkLoop(BiFunction<MappedFieldType, SearchExecutionContext, Query> queryBuilder) throws IOException {
Query query = queryBuilder.apply(loopFieldType(), mockContext(true, loopFieldType()));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't really checking an actual production code path before. We now run a search, which forces the script to look up values triggering the loop handling.

MemoryIndex mindex = new MemoryIndex();
IndexSearcher searcher = mindex.createSearcher();
Exception e = expectThrows(IllegalArgumentException.class, () -> searcher.search(query, 1));
assertThat(e.getMessage(), equalTo("Cyclic dependency detected while resolving runtime fields: test -> test"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,11 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup) -> (ctx) -> new BooleanFieldScript(fieldName, params, lookup, ctx) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,17 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup, formatter) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup, formatter) -> (ctx) -> new DateFieldScript(
fieldName,
params,
lookup,
formatter,
ctx
) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ public void testRangeQuery() throws IOException {

@Override
protected Query randomRangeQuery(MappedFieldType ft, SearchExecutionContext ctx) {
return ft.rangeQuery(randomLong(), randomLong(), randomBoolean(), randomBoolean(), null, null, null, ctx);
long lower = randomLongBetween(0, 1000);
long upper = lower + randomLongBetween(5, 5000);
return ft.rangeQuery(lower, upper, randomBoolean(), randomBoolean(), null, null, null, ctx);
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we're actually executing the random query now, we need to ensure its parameter constraints are met.


@Override
Expand Down Expand Up @@ -294,10 +296,11 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup) -> (ctx) -> new DoubleFieldScript(fieldName, params, lookup, ctx) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,11 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup) -> (ctx) -> new GeoPointFieldScript(fieldName, params, lookup, ctx) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,11 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup) -> (ctx) -> new IpFieldScript(fieldName, params, lookup, ctx) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down