Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract references tracking from SearchLookup #68202

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.elasticsearch.search.NestedDocuments;
import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.TrackingMappedFieldsLookup;
import org.elasticsearch.transport.RemoteClusterAware;

import java.io.IOException;
Expand Down Expand Up @@ -266,8 +267,7 @@ public boolean allowExpensiveQueries() {

@SuppressWarnings("unchecked")
public <IFD extends IndexFieldData<?>> IFD getForField(MappedFieldType fieldType) {
return (IFD) indexFieldDataService.apply(fieldType, fullyQualifiedIndex.getName(),
() -> this.lookup().forkAndTrackFieldReferences(fieldType.name()));
return (IFD) this.lookup().getForField(fieldType);
}

public void addNamedQuery(String name, Query query) {
Expand Down Expand Up @@ -438,6 +438,16 @@ public SearchLookup lookup() {
return this.lookup;
}

/**
* Get a search lookup that tracks circular references starting from {@code field}
*/
public SearchLookup lookup(String field) {
return new SearchLookup(
new TrackingMappedFieldsLookup(this::getFieldType).trackingField(field)::get,
(f, s) -> indexFieldDataService.apply(f, fullyQualifiedIndex.getName(), s)
);
}

public NestedScope nestedScope() {
return nestedScope;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,100 +23,58 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;

public class SearchLookup {
/**
* The maximum depth of field dependencies.
* When a runtime field's doc values depends on another runtime field's doc values,
* which depends on another runtime field's doc values and so on, it can
* make a very deep stack, which we want to limit.
*/
private static final int MAX_FIELD_CHAIN_DEPTH = 5;

/**
* The chain of fields for which this lookup was created, used for detecting
* loops caused by runtime fields referring to other runtime fields. The chain is empty
* for the "top level" lookup created for the entire search. When a lookup is used to load
* fielddata for a field, we fork it and make sure the field name name isn't in the chain,
* then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If
* that field looks up the values of a field named {@code b} then
* {@code b}'s chain will contain {@code ["a", "b"]}.
*/
private final Set<String> fieldChain;
private final SourceLookup sourceLookup;
private final Function<String, MappedFieldType> fieldTypeLookup;
private final TrackingMappedFieldsLookup fieldTypeLookup;
private final BiFunction<MappedFieldType, Supplier<SearchLookup>, IndexFieldData<?>> fieldDataLookup;

/**
* Create the top level field lookup for a search request. Provides a way to look up fields from doc_values,
* stored fields, or _source.
* Create the top level field lookup for a search request.
*
* Provides a way to look up fields from doc_values, stored fields, or _source.
*/
public SearchLookup(Function<String, MappedFieldType> fieldTypeLookup,
BiFunction<MappedFieldType, Supplier<SearchLookup>, IndexFieldData<?>> fieldDataLookup) {
this.fieldTypeLookup = fieldTypeLookup;
this.fieldChain = Collections.emptySet();
this.fieldTypeLookup = new TrackingMappedFieldsLookup(fieldTypeLookup);
this.sourceLookup = new SourceLookup();
this.fieldDataLookup = fieldDataLookup;
}

/**
* Create a new {@link SearchLookup} that looks fields up the same as the one provided as argument,
* Create a new {@link SearchLookup} that looks fields up the same as the wrapped one,
* while also tracking field references starting from the provided field name. It detects cycles
* and prevents resolving fields that depend on more than {@link #MAX_FIELD_CHAIN_DEPTH} fields.
* @param searchLookup the existing lookup to create a new one from
* @param fieldChain the chain of fields that required the field currently being loaded
* and prevents resolving fields that depend on more than
* {@link TrackingMappedFieldsLookup#MAX_FIELD_CHAIN_DEPTH} fields.
* @param field the field to exclude from further field lookups
*/
private SearchLookup(SearchLookup searchLookup, Set<String> fieldChain) {
this.fieldChain = Collections.unmodifiableSet(fieldChain);
private SearchLookup(SearchLookup searchLookup, String field) {
this.sourceLookup = searchLookup.sourceLookup;
this.fieldTypeLookup = searchLookup.fieldTypeLookup;
this.fieldTypeLookup = searchLookup.fieldTypeLookup.trackingField(field);
this.fieldDataLookup = searchLookup.fieldDataLookup;
}

/**
* Creates a copy of the current {@link SearchLookup} that looks fields up in the same way, but also tracks field references
* in order to detect cycles and prevent resolving fields that depend on more than {@link #MAX_FIELD_CHAIN_DEPTH} other fields.
* @param field the field being referred to, for which fielddata needs to be loaded
* @return the new lookup
* @throws IllegalArgumentException if a cycle is detected in the fields required to build doc values, or if the field
* being resolved depends on more than {@link #MAX_FIELD_CHAIN_DEPTH}
*/
public final SearchLookup forkAndTrackFieldReferences(String field) {
Objects.requireNonNull(field, "field cannot be null");
Set<String> newFieldChain = new LinkedHashSet<>(fieldChain);
if (newFieldChain.add(field) == false) {
String message = String.join(" -> ", newFieldChain) + " -> " + field;
throw new IllegalArgumentException("Cyclic dependency detected while resolving runtime fields: " + message);
}
if (newFieldChain.size() > MAX_FIELD_CHAIN_DEPTH) {
throw new IllegalArgumentException("Field requires resolving too many dependent fields: " + String.join(" -> ", newFieldChain));
}
return new SearchLookup(this, newFieldChain);
}

public LeafSearchLookup getLeafSearchLookup(LeafReaderContext context) {
return new LeafSearchLookup(context,
new LeafDocLookup(fieldTypeLookup, this::getForField, context),
new LeafDocLookup(fieldTypeLookup::get, this::getForField, context),
sourceLookup,
new LeafStoredFieldsLookup(fieldTypeLookup, (doc, visitor) -> context.reader().document(doc, visitor)));
new LeafStoredFieldsLookup(fieldTypeLookup::get, (doc, visitor) -> context.reader().document(doc, visitor)));
}

public MappedFieldType fieldType(String fieldName) {
return fieldTypeLookup.apply(fieldName);
return fieldTypeLookup.get(fieldName);
}

public IndexFieldData<?> getForField(MappedFieldType fieldType) {
return fieldDataLookup.apply(fieldType, () -> forkAndTrackFieldReferences(fieldType.name()));
return fieldDataLookup.apply(fieldType, () -> new SearchLookup(this, fieldType.name()));
}

public SourceLookup source() {
return sourceLookup;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.lookup;

import org.elasticsearch.index.mapper.MappedFieldType;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Function;

/**
* Used for detecting loops caused by fields referring to other fields. The chain is empty
* for the "top level" lookup created for the entire search. When a lookup is used to load
* fielddata for a field, we fork it and make sure the field name name isn't in the chain,
* then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If
* that field looks up the values of a field named {@code b} then
* {@code b}'s chain will contain {@code ["a", "b"]}.
*/
public final class TrackingMappedFieldsLookup {

/**
* The maximum depth of field dependencies.
* When a runtime field's doc values depends on another runtime field's doc values,
* which depends on another runtime field's doc values and so on, it can
* make a very deep stack, which we want to limit.
*/
public static final int MAX_FIELD_CHAIN_DEPTH = 5;

final Function<String, MappedFieldType> lookup;
final Set<String> references;

public TrackingMappedFieldsLookup(Function<String, MappedFieldType> lookup) {
this.lookup = lookup;
this.references = Collections.emptySet();
}

private TrackingMappedFieldsLookup(Function<String, MappedFieldType> lookup, Set<String> references, String field) {
this.lookup = lookup;
this.references = new LinkedHashSet<>(references);
if (this.references.add(field) == false) {
String message = String.join(" -> ", this.references) + " -> " + field;
throw new IllegalArgumentException("Cyclic dependency detected while resolving runtime fields: " + message);
}
if (this.references.size() > MAX_FIELD_CHAIN_DEPTH) {
throw new IllegalArgumentException("Field requires resolving too many dependent fields: "
+ String.join(" -> ", this.references));
}
}

public TrackingMappedFieldsLookup trackingField(String field) {
return new TrackingMappedFieldsLookup(lookup, this.references, field);
}

public MappedFieldType get(String field) {
return lookup.apply(field);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.search.aggregations.support;

import com.carrotsearch.randomizedtesting.generators.RandomStrings;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ protected SearchExecutionContext createSearchExecutionContext(MapperService mapp
when(searchExecutionContext.lookup()).thenReturn(new SearchLookup(mapperService::fieldType, (ft, s) -> {
throw new UnsupportedOperationException("search lookup not available");
}));
when(searchExecutionContext.lookup(anyString())).thenReturn(new SearchLookup(mapperService::fieldType, (ft, s) -> {
throw new UnsupportedOperationException("search lookup not available");
}));
return searchExecutionContext;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected final LeafFactory leafFactory(SearchExecutionContext context) {
* detection code as though we were resolving field data for this field.
* We're not, but running the query is close enough.
*/
return leafFactory(context.lookup().forkAndTrackFieldReferences(name()));
return leafFactory(context.lookup(name()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
package org.elasticsearch.xpack.runtimefields.mapper;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Strings;
Expand All @@ -28,6 +30,7 @@
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.TrackingMappedFieldsLookup;
import org.elasticsearch.xpack.runtimefields.RuntimeFields;

import java.io.IOException;
Expand All @@ -37,6 +40,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;

import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Matchers.anyString;
Expand Down Expand Up @@ -206,14 +210,19 @@ protected static SearchExecutionContext mockContext(boolean allowExpensiveQuerie
(mft, lookupSupplier) -> mft.fielddataBuilder("test", lookupSupplier).build(null, null)
);
when(context.lookup()).thenReturn(lookup);
when(context.lookup(anyString())).thenAnswer(invocationOnMock -> {
String field = (String) invocationOnMock.getArguments()[0];
TrackingMappedFieldsLookup tracker = new TrackingMappedFieldsLookup(context::getFieldType).trackingField(field);
return new SearchLookup(tracker::get, (mft, lookupSupplier) -> mft.fielddataBuilder("test", lookupSupplier).build(null, null));
});
return context;
}

public void testExistsQueryIsExpensive() {
checkExpensiveQuery(MappedFieldType::existsQuery);
}

public void testExistsQueryInLoop() {
public void testExistsQueryInLoop() throws IOException {
checkLoop(MappedFieldType::existsQuery);
}

Expand All @@ -230,7 +239,7 @@ public void testRangeQueryIsExpensive() {
checkExpensiveQuery(this::randomRangeQuery);
}

public void testRangeQueryInLoop() {
public void testRangeQueryInLoop() throws IOException {
assumeTrue("Impl does not support range queries", supportsRangeQueries());
checkLoop(this::randomRangeQuery);
}
Expand All @@ -240,7 +249,7 @@ public void testTermQueryIsExpensive() {
checkExpensiveQuery(this::randomTermQuery);
}

public void testTermQueryInLoop() {
public void testTermQueryInLoop() throws IOException {
assumeTrue("Impl does not support term queries", supportsTermQueries());
checkLoop(this::randomTermQuery);
}
Expand All @@ -250,7 +259,7 @@ public void testTermsQueryIsExpensive() {
checkExpensiveQuery(this::randomTermsQuery);
}

public void testTermsQueryInLoop() {
public void testTermsQueryInLoop() throws IOException {
assumeTrue("Impl does not support term queries", supportsTermQueries());
checkLoop(this::randomTermsQuery);
}
Expand Down Expand Up @@ -301,8 +310,11 @@ protected final void checkExpensiveQuery(BiConsumer<MappedFieldType, SearchExecu
);
}

protected final void checkLoop(BiConsumer<MappedFieldType, SearchExecutionContext> queryBuilder) {
Exception e = expectThrows(IllegalArgumentException.class, () -> queryBuilder.accept(loopFieldType(), mockContext()));
protected final void checkLoop(BiFunction<MappedFieldType, SearchExecutionContext, Query> queryBuilder) throws IOException {
Query query = queryBuilder.apply(loopFieldType(), mockContext(true, loopFieldType()));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't really checking an actual production code path before. We now run a search, which forces the script to look up values triggering the loop handling.

MemoryIndex mindex = new MemoryIndex();
IndexSearcher searcher = mindex.createSearcher();
Exception e = expectThrows(IllegalArgumentException.class, () -> searcher.search(query, 1));
assertThat(e.getMessage(), equalTo("Cyclic dependency detected while resolving runtime fields: test -> test"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,11 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup) -> (ctx) -> new BooleanFieldScript(fieldName, params, lookup, ctx) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,17 @@ public void execute() {
}
};
case "loop":
return (fieldName, params, lookup, formatter) -> {
// Indicate that this script wants the field call "test", which *is* the name of this field
lookup.forkAndTrackFieldReferences("test");
throw new IllegalStateException("shoud have thrown on the line above");
return (fieldName, params, lookup, formatter) -> (ctx) -> new DateFieldScript(
fieldName,
params,
lookup,
formatter,
ctx
) {
@Override
public void execute() {
leafSearchLookup.doc().get("test");
}
};
default:
throw new IllegalArgumentException("unsupported script [" + code + "]");
Expand Down