Skip to content

Commit

Permalink
Fix Fields API Caching Regression (#90017)
Browse files Browse the repository at this point in the history
This fixes both a bug and a performance regression.

After adding the text mappings family to the scripting fields API, field-style access uses source to 
generate values while doc-style access uses doc values. This means that a script accessing the same 
text field using both apis may see incorrect results.

There is also a performance regression where previously we checked a single cache to try to ensure 
that doc-style access only used doc values, but this was done on a per-document basis.

This change adds separate caches for field-style access and doc-style access in LeafDocLookup where 
all the work to cache field data is done per-segment, and the parallel caches allow no additional checks 
to be made when accessing the values per-document. The caches still share field data when possible 
for non-text based fields, so we don't double load.
  • Loading branch information
jdconrad committed Sep 14, 2022
1 parent a23999e commit cc0679e
Show file tree
Hide file tree
Showing 4 changed files with 607 additions and 25 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/90017.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 90017
summary: Fix Fields API Caching Regression
area: Infra/Scripting
type: regression
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
setup:
- do:
indices.create:
index: test0
body:
settings:
number_of_shards: 1
mappings:
properties:
text:
type: text
fielddata: true
long:
type: long

- do:
index:
index: test0
id: "1"
body:
text: "Lots of text."
long: 1

- do:
indices.create:
index: test1
body:
settings:
number_of_shards: 1
mappings:
properties:
text:
type: text
fielddata: true
long:
type: long

- do:
index:
index: test1
id: "1"
body:
text: "Lots of text."
long: 1

- do:
indices.refresh: {}

---
"test_leaf_cache_text_field_first":
- do:
search:
rest_total_hits_as_int: true
index: test0
body:
script_fields:
field_0:
script:
source: "/* avoid stash */ $('text', '') + ' ' + doc['text'].value"
- match: { hits.hits.0.fields.field_0.0: 'Lots of text. lots' }

- do:
search:
rest_total_hits_as_int: true
index: test0
body:
script_fields:
field_0:
script:
source: "/* avoid stash */ $('text', '') + ' ' + doc['text'].value"
field_1:
script:
source: "doc['text'].value + ' ' + $('text', '')"
field_2:
script:
source: "/* avoid stash */ $('text', '') + ' ' + doc['text'].value"
field_3:
script:
source: "doc['text'].value + ' ' + $('text', '')"
- match: { hits.hits.0.fields.field_0.0: 'Lots of text. lots' }
- match: { hits.hits.0.fields.field_1.0: 'lots Lots of text.' }
- match: { hits.hits.0.fields.field_2.0: 'Lots of text. lots' }
- match: { hits.hits.0.fields.field_3.0: 'lots Lots of text.' }

---
"test_leaf_cache_text_doc_first":
- do:
search:
rest_total_hits_as_int: true
index: test1
body:
script_fields:
field_0:
script:
source: "doc['text'].value + ' ' + $('text', '')"
- match: { hits.hits.0.fields.field_0.0: 'lots Lots of text.' }

- do:
search:
rest_total_hits_as_int: true
index: test1
body:
script_fields:
field_0:
script:
source: "doc['text'].value + ' ' + $('text', '')"
field_1:
script:
source: "/* avoid stash */ $('text', '') + ' ' + doc['text'].value"
field_2:
script:
source: "doc['text'].value + ' ' + $('text', '')"
field_3:
script:
source: "/* avoid stash */ $('text', '') + ' ' + doc['text'].value"
- match: { hits.hits.0.fields.field_0.0: 'lots Lots of text.' }
- match: { hits.hits.0.fields.field_1.0: 'Lots of text. lots' }
- match: { hits.hits.0.fields.field_2.0: 'lots Lots of text.' }
- match: { hits.hits.0.fields.field_3.0: 'Lots of text. lots' }

---
"test_leaf_cache_long_field_first":
- do:
search:
rest_total_hits_as_int: true
index: test0
body:
script_fields:
field_0:
script:
source: "/* avoid stash */ $('long', 0) + ' ' + doc['long'].value"
- match: { hits.hits.0.fields.field_0.0: '1 1' }

- do:
search:
rest_total_hits_as_int: true
index: test0
body:
script_fields:
field_0:
script:
source: "/* avoid stash */ $('long', 0) + ' ' + doc['long'].value"
field_1:
script:
source: "doc['long'].value + ' ' + $('long', 0)"
field_2:
script:
source: "/* avoid stash */ $('long', 0) + ' ' + doc['long'].value"
field_3:
script:
source: "doc['long'].value + ' ' + $('long', 0)"
- match: { hits.hits.0.fields.field_0.0: '1 1' }
- match: { hits.hits.0.fields.field_1.0: '1 1' }
- match: { hits.hits.0.fields.field_2.0: '1 1' }
- match: { hits.hits.0.fields.field_3.0: '1 1' }

---
"test_leaf_cache_long_doc_first":
- do:
search:
rest_total_hits_as_int: true
index: test1
body:
script_fields:
field_0:
script:
source: "doc['long'].value + ' ' + $('long', 0)"
- match: { hits.hits.0.fields.field_0.0: '1 1' }

- do:
search:
rest_total_hits_as_int: true
index: test1
body:
script_fields:
field_0:
script:
source: "doc['long'].value + ' ' + $('long', 0)"
field_1:
script:
source: "/* avoid stash */ $('long', 0) + ' ' + doc['long'].value"
field_2:
script:
source: "doc['long'].value + ' ' + $('long', 0)"
field_3:
script:
source: "/* avoid stash */ $('long', 0) + ' ' + doc['long'].value"
- match: { hits.hits.0.fields.field_0.0: '1 1' }
- match: { hits.hits.0.fields.field_1.0: '1 1' }
- match: { hits.hits.0.fields.field_2.0: '1 1' }
- match: { hits.hits.0.fields.field_3.0: '1 1' }
134 changes: 109 additions & 25 deletions server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
import java.util.function.BiFunction;
import java.util.function.Function;

import static org.elasticsearch.index.mapper.MappedFieldType.FielddataOperation.SCRIPT;
import static org.elasticsearch.index.mapper.MappedFieldType.FielddataOperation.SEARCH;

public class LeafDocLookup implements Map<String, ScriptDocValues<?>> {

private final Function<String, MappedFieldType> fieldTypeLookup;
Expand All @@ -34,7 +37,20 @@ public class LeafDocLookup implements Map<String, ScriptDocValues<?>> {

private int docId = -1;

private final Map<String, DocValuesScriptFieldFactory> localCacheScriptFieldData = Maps.newMapWithExpectedSize(4);
/*
We run parallel caches for the fields-access API ( field('f') ) and
the doc-access API.( doc['f'] ) for two reasons:
1. correctness - the field cache can store fields that retrieve values
from both doc values and source whereas the doc cache
can only store doc values. This leads to cases such as text
field where sharing a cache could lead to incorrect results in a
script that uses both types of access (likely common during upgrades)
2. performance - to keep the performance reasonable we move all caching updates to
per-segment computation as opposed to per-document computation
Note that we share doc values between both caches when possible.
*/
final Map<String, DocValuesScriptFieldFactory> fieldFactoryCache = Maps.newMapWithExpectedSize(4);
final Map<String, DocValuesScriptFieldFactory> docFactoryCache = Maps.newMapWithExpectedSize(4);

LeafDocLookup(
Function<String, MappedFieldType> fieldTypeLookup,
Expand All @@ -50,32 +66,48 @@ public void setDocument(int docId) {
this.docId = docId;
}

protected DocValuesScriptFieldFactory getScriptFieldFactory(String fieldName, MappedFieldType.FielddataOperation options) {
DocValuesScriptFieldFactory factory = localCacheScriptFieldData.get(fieldName);
// used to load data for a field-style api accessor
private DocValuesScriptFieldFactory getFactoryForField(String fieldName) {
final MappedFieldType fieldType = fieldTypeLookup.apply(fieldName);

// do not use cached source fallback fields for old style doc access
if (options == MappedFieldType.FielddataOperation.SEARCH
&& factory instanceof SourceValueFetcherIndexFieldData.ValueFetcherDocValues) {
factory = null;
if (fieldType == null) {
throw new IllegalArgumentException("No field found for [" + fieldName + "] in mapping");
}

if (factory == null) {
final MappedFieldType fieldType = fieldTypeLookup.apply(fieldName);
// Load the field data on behalf of the script. Otherwise, it would require
// additional permissions to deal with pagedbytes/ramusagestimator/etc.
return AccessController.doPrivileged(new PrivilegedAction<DocValuesScriptFieldFactory>() {
@Override
public DocValuesScriptFieldFactory run() {
DocValuesScriptFieldFactory fieldFactory = null;
IndexFieldData<?> indexFieldData = fieldDataLookup.apply(fieldType, SCRIPT);

if (fieldType == null) {
throw new IllegalArgumentException("No field found for [" + fieldName + "] in mapping");
}
DocValuesScriptFieldFactory docFactory = null;

if (docFactoryCache.isEmpty() == false) {
docFactory = docFactoryCache.get(fieldName);
}

// Load the field data on behalf of the script. Otherwise, it would require
// additional permissions to deal with pagedbytes/ramusagestimator/etc.
factory = AccessController.doPrivileged(new PrivilegedAction<DocValuesScriptFieldFactory>() {
@Override
public DocValuesScriptFieldFactory run() {
return fieldDataLookup.apply(fieldType, options).load(reader).getScriptFieldFactory(fieldName);
// if this field has already been accessed via the doc-access API and the field-access API
// uses doc values then we share to avoid double-loading
if (docFactory != null && indexFieldData instanceof SourceValueFetcherIndexFieldData == false) {
fieldFactory = docFactory;
} else {
fieldFactory = indexFieldData.load(reader).getScriptFieldFactory(fieldName);
}
});

localCacheScriptFieldData.put(fieldName, factory);
fieldFactoryCache.put(fieldName, fieldFactory);

return fieldFactory;
}
});
}

public Field<?> getScriptField(String fieldName) {
DocValuesScriptFieldFactory factory = fieldFactoryCache.get(fieldName);

if (factory == null) {
factory = getFactoryForField(fieldName);
}

try {
Expand All @@ -84,22 +116,74 @@ public DocValuesScriptFieldFactory run() {
throw ExceptionsHelper.convertToElastic(ioe);
}

return factory;
return factory.toScriptField();
}

public Field<?> getScriptField(String fieldName) {
return getScriptFieldFactory(fieldName, MappedFieldType.FielddataOperation.SCRIPT).toScriptField();
// used to load data for a doc-style api accessor
private DocValuesScriptFieldFactory getFactoryForDoc(String fieldName) {
final MappedFieldType fieldType = fieldTypeLookup.apply(fieldName);

if (fieldType == null) {
throw new IllegalArgumentException("No field found for [" + fieldName + "] in mapping");
}

// Load the field data on behalf of the script. Otherwise, it would require
// additional permissions to deal with pagedbytes/ramusagestimator/etc.
return AccessController.doPrivileged(new PrivilegedAction<DocValuesScriptFieldFactory>() {
@Override
public DocValuesScriptFieldFactory run() {
DocValuesScriptFieldFactory docFactory = null;
IndexFieldData<?> indexFieldData = fieldDataLookup.apply(fieldType, SEARCH);

DocValuesScriptFieldFactory fieldFactory = null;

if (fieldFactoryCache.isEmpty() == false) {
fieldFactory = fieldFactoryCache.get(fieldName);
}

if (fieldFactory != null) {
IndexFieldData<?> fieldIndexFieldData = fieldDataLookup.apply(fieldType, SCRIPT);

// if this field has already been accessed via the field-access API and the field-access API
// uses doc values then we share to avoid double-loading
if (fieldIndexFieldData instanceof SourceValueFetcherIndexFieldData == false) {
docFactory = fieldFactory;
}
}

if (docFactory == null) {
docFactory = indexFieldData.load(reader).getScriptFieldFactory(fieldName);
}

docFactoryCache.put(fieldName, docFactory);

return docFactory;
}
});
}

@Override
public ScriptDocValues<?> get(Object key) {
return getScriptFieldFactory(key.toString(), MappedFieldType.FielddataOperation.SEARCH).toScriptDocValues();
String fieldName = key.toString();
DocValuesScriptFieldFactory factory = docFactoryCache.get(fieldName);

if (factory == null) {
factory = getFactoryForDoc(key.toString());
}

try {
factory.setNextDocId(docId);
} catch (IOException ioe) {
throw ExceptionsHelper.convertToElastic(ioe);
}

return factory.toScriptDocValues();
}

@Override
public boolean containsKey(Object key) {
String fieldName = key.toString();
return localCacheScriptFieldData.get(fieldName) != null || fieldTypeLookup.apply(fieldName) != null;
return docFactoryCache.containsKey(key) || fieldFactoryCache.containsKey(key) || fieldTypeLookup.apply(fieldName) != null;
}

@Override
Expand Down

0 comments on commit cc0679e

Please sign in to comment.