Skip to content

Commit

Permalink
Speed up synthetic keyword, ip, and text fields (#87930)
Browse files Browse the repository at this point in the history
This speeds up synthetic source, especially when there are many fields
in the index that are declared in the mapping but don't have values.
This is fairly common with ECS, and the tsdb rally track uses that. And
this improves fetch performance of that track:
```
|  50th percentile service time |    default |   6.24029 |  4.85568 | ms | -22.19% |
|  90th percentile service time |    default |   7.89923 |  6.52069 | ms | -17.45% |
|  99th percentile service time |    default |  12.0306  | 16.435   | ms | +36.61% |
| 100th percentile service time |    default |  14.2873  | 17.1175  | ms | +19.81% |
|  50th percentile service time | default_1k | 158.425   | 25.3236  | ms | -84.02% |
|  90th percentile service time | default_1k | 165.46    | 30.8655  | ms | -81.35% |
|  99th percentile service time | default_1k | 168.954   | 33.3342  | ms | -80.27% |
| 100th percentile service time | default_1k | 174.341   | 34.8344  | ms | -80.02% |
```

There's a slight increase in the 99th and 100th percentile service time
for fetching ten document which think is unlucky jitter. Hopefully. The
average performance of fetching ten docs improves anyway so I think
we're ok. Fetching a thousand documents improves 80% across the board
which is lovely.

This works by doing three things:
1. Teach the "leaf" layer of source loader to detect when the field is
   empty in that segment and remove it from the synthesis process
   entirely. This brings most of the speed up in tsdb.
2. Replace `hasValue` with a callback when writing the first value.
   `hasValue` was resulting in a 2^n-like number of calls that really
   showed up in the profiler.
3. Replace the `ArrayList` of leaf loaders with an array. Before fixing
   the other two issues the `ArrayList`'s iterator really showed up in
   the profiling. Probably much less worth it now, but it's small.

All of this brings synthetic source much closer to the fetch performance
of standard _source:
```
|  50th percentile service time | default_1k |  11.4016  | 25.3236  | ms | +122.11% |
|  90th percentile service time | default_1k |  13.7212  | 30.8655  | ms | +124.95% |
|  99th percentile service time | default_1k |  15.8785  | 33.3342  | ms | +109.93% |
| 100th percentile service time | default_1k |  16.9715  | 34.8344  | ms | +105.25% |
```

One important thing, these perf numbers come from fetching *hot* blocks
on disk. They mostly compare CPU overhead and not disk overhead.
  • Loading branch information
nik9000 committed Jun 24, 2022
1 parent 52cf0f7 commit bcca9d1
Show file tree
Hide file tree
Showing 21 changed files with 318 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
}
return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(decodeForSyntheticSource(value, scalingFactor));
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,12 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
private final Double nullValue = usually() ? null : round(randomValue());

@Override
public SyntheticSourceExample example() {
public SyntheticSourceExample example(int maxValues) {
if (randomBoolean()) {
Tuple<Double, Double> v = generateValue();
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
}
List<Tuple<Double, Double>> values = randomList(1, 5, this::generateValue);
List<Tuple<Double, Double>> values = randomList(1, maxValues, this::generateValue);
List<Double> in = values.stream().map(Tuple::v1).toList();
List<Double> outList = values.stream().map(Tuple::v2).sorted().toList();
Object out = outList.size() == 1 ? outList.get(0) : outList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ private GetResult innerGetFetch(
SourceLoader loader = forceSyntheticSource
? new SourceLoader.Synthetic(mappingLookup.getMapping())
: mappingLookup.newSourceLoader();
source = loader.leaf(docIdAndVersion.reader).source(fieldVisitor, docIdAndVersion.docId);
source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId }).source(fieldVisitor, docIdAndVersion.docId);

// put stored fields into result objects
if (fieldVisitor.fields().isEmpty() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
}
return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(value == 1);
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
}
return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(fieldType().format(value, fieldType().dateTimeFormatter()));
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
final GeoPoint point = new GeoPoint();

@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
point.reset(GeoEncodingUtils.decodeLatitude((int) (value >>> 32)), GeoEncodingUtils.decodeLongitude((int) value));
point.toXContent(b, ToXContent.EMPTY_PARAMS);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.FieldValues;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
Expand Down Expand Up @@ -556,9 +555,15 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
}
return new KeywordFieldMapper.BytesSyntheticFieldLoader(name(), simpleName()) {
@Override
protected void loadNextValue(XContentBuilder b, BytesRef value) throws IOException {
protected BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
b.value(NetworkAddress.format(InetAddressPoint.decode(bytes)));
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.ReaderSlice;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
Expand Down Expand Up @@ -1060,8 +1061,14 @@ protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleNa
}
return new BytesSyntheticFieldLoader(name(), simpleName) {
@Override
protected void loadNextValue(XContentBuilder b, BytesRef value) throws IOException {
b.value(value.utf8ToString());
protected BytesRef convert(BytesRef value) {
return value;
}

@Override
protected BytesRef preserve(BytesRef value) {
// Preserve must make a deep copy because convert gets a shallow copy from the iterator
return BytesRef.deepCopyOf(value);
}
};
}
Expand All @@ -1076,13 +1083,71 @@ public BytesSyntheticFieldLoader(String name, String simpleName) {
}

@Override
public Leaf leaf(LeafReader reader) throws IOException {
SortedSetDocValues leaf = DocValues.getSortedSet(reader, name);
if (leaf.getValueCount() == 0) {
return SourceLoader.SyntheticFieldLoader.NOTHING.leaf(reader);
public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException {
SortedSetDocValues dv = DocValues.getSortedSet(reader, name);
if (dv.getValueCount() == 0) {
return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF;
}
if (docIdsInLeaf.length == 1) {
/*
* The singleton optimization is mostly about looking up ordinals
* in sorted order and doesn't buy anything if there is only a single
* document.
*/
return new ImmediateLeaf(dv);
}
SortedDocValues singleton = DocValues.unwrapSingleton(dv);
if (singleton != null) {
return singletonLeaf(singleton, docIdsInLeaf);
}
return new ImmediateLeaf(dv);
}

/**
* Load all ordinals for all docs up front and resolve to their string
* values in order. This should be much more disk-friendly than
* {@link ImmediateLeaf} because it resolves the ordinals in order and
* marginally more cpu friendly because it resolves the ordinals one time.
*/
private Leaf singletonLeaf(SortedDocValues singleton, int[] docIdsInLeaf) throws IOException {
int[] ords = new int[docIdsInLeaf.length];
int found = 0;
for (int d = 0; d < docIdsInLeaf.length; d++) {
if (false == singleton.advanceExact(docIdsInLeaf[d])) {
ords[d] = -1;
continue;
}
ords[d] = singleton.ordValue();
found++;
}
if (found == 0) {
return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF;
}
int[] sortedOrds = ords.clone();
Arrays.sort(sortedOrds);
int unique = 0;
int prev = -1;
for (int ord : sortedOrds) {
if (ord != prev) {
prev = ord;
unique++;
}
}
int[] uniqueOrds = new int[unique];
BytesRef[] converted = new BytesRef[unique];
unique = 0;
prev = -1;
for (int ord : sortedOrds) {
if (ord != prev) {
prev = ord;
uniqueOrds[unique] = ord;
converted[unique] = preserve(convert(singleton.lookupOrd(ord)));
unique++;
}
}
logger.debug("loading [{}] on [{}] docs covering [{}] ords", name, docIdsInLeaf.length, uniqueOrds.length);
return new SourceLoader.SyntheticFieldLoader.Leaf() {
private boolean hasValue;
private int ord;

@Override
public boolean empty() {
Expand All @@ -1091,32 +1156,90 @@ public boolean empty() {

@Override
public boolean advanceToDoc(int docId) throws IOException {
return hasValue = leaf.advanceExact(docId);
int idx = Arrays.binarySearch(docIdsInLeaf, docId);
if (idx < 0) {
throw new IllegalStateException(
"received unexpected docId [" + docId + "]. Expected " + Arrays.toString(docIdsInLeaf)
);
}
ord = ords[idx];
return ord >= 0;
}

@Override
public void write(XContentBuilder b) throws IOException {
if (false == hasValue) {
return;
}
long first = leaf.nextOrd();
long next = leaf.nextOrd();
if (next == SortedSetDocValues.NO_MORE_ORDS) {
b.field(simpleName);
loadNextValue(b, leaf.lookupOrd(first));
if (ord < 0) {
return;
}
b.startArray(simpleName);
loadNextValue(b, leaf.lookupOrd(first));
loadNextValue(b, leaf.lookupOrd(next));
while ((next = leaf.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
loadNextValue(b, leaf.lookupOrd(next));
int idx = Arrays.binarySearch(uniqueOrds, ord);
if (idx < 0) {
throw new IllegalStateException("received unexpected ord [" + ord + "]. Expected " + Arrays.toString(uniqueOrds));
}
b.endArray();
BytesRef c = converted[idx];
b.field(simpleName).utf8Value(c.bytes, c.offset, c.length);
}
};
}

protected abstract void loadNextValue(XContentBuilder b, BytesRef value) throws IOException;
/**
* Load ordinals in line with populating the doc and immediately
* convert from ordinals into {@link BytesRef}s.
*/
private class ImmediateLeaf implements Leaf {
private final SortedSetDocValues dv;
private boolean hasValue;

ImmediateLeaf(SortedSetDocValues dv) {
this.dv = dv;
}

@Override
public boolean empty() {
return false;
}

@Override
public boolean advanceToDoc(int docId) throws IOException {
return hasValue = dv.advanceExact(docId);
}

@Override
public void write(XContentBuilder b) throws IOException {
if (false == hasValue) {
return;
}
long first = dv.nextOrd();
long next = dv.nextOrd();
if (next == SortedSetDocValues.NO_MORE_ORDS) {
BytesRef c = convert(dv.lookupOrd(first));
b.field(simpleName).utf8Value(c.bytes, c.offset, c.length);
return;
}
b.startArray(simpleName);
BytesRef c = convert(dv.lookupOrd(first));
b.utf8Value(c.bytes, c.offset, c.length);
c = convert(dv.lookupOrd(next));
b.utf8Value(c.bytes, c.offset, c.length);
while ((next = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
c = convert(dv.lookupOrd(next));
b.utf8Value(c.bytes, c.offset, c.length);
}
b.endArray();
}
}

/**
* Convert a {@link BytesRef} read from the source into bytes to write
* to the xcontent. This shouldn't make a deep copy if the conversion
* process itself doesn't require one.
*/
protected abstract BytesRef convert(BytesRef value);

/**
* Preserves {@link BytesRef bytes} returned by {@link #convert}
* to by written later. This should make a
* {@link BytesRef#deepCopyOf deep copy} if {@link #convert} didn't.
*/
protected abstract BytesRef preserve(BytesRef value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ private static void validateParsed(float value) {
SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) {
return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(HalfFloatPoint.sortableShortToHalfFloat((short) value));
}
};
Expand Down Expand Up @@ -514,7 +514,7 @@ private static void validateParsed(float value) {
SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) {
return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(NumericUtils.sortableIntToFloat((int) value));
}
};
Expand Down Expand Up @@ -627,7 +627,7 @@ private static void validateParsed(double value) {
SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) {
return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(NumericUtils.sortableLongToDouble(value));
}
};
Expand Down Expand Up @@ -1270,7 +1270,7 @@ public double reduceToStoredPrecision(double value) {
private static SourceLoader.SyntheticFieldLoader syntheticLongFieldLoader(String fieldName, String fieldSimpleName) {
return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) {
@Override
protected void loadNextValue(XContentBuilder b, long value) throws IOException {
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(value);
}
};
Expand Down Expand Up @@ -1631,10 +1631,10 @@ protected NumericSyntheticFieldLoader(String name, String simpleName) {
}

@Override
public Leaf leaf(LeafReader reader) throws IOException {
public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException {
SortedNumericDocValues leaf = dv(reader);
if (leaf == null) {
return SourceLoader.SyntheticFieldLoader.NOTHING.leaf(reader);
return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF;
}
return new SourceLoader.SyntheticFieldLoader.Leaf() {
private boolean hasValue;
Expand All @@ -1656,19 +1656,19 @@ public void write(XContentBuilder b) throws IOException {
}
if (leaf.docValueCount() == 1) {
b.field(simpleName);
loadNextValue(b, leaf.nextValue());
writeValue(b, leaf.nextValue());
return;
}
b.startArray(simpleName);
for (int i = 0; i < leaf.docValueCount(); i++) {
loadNextValue(b, leaf.nextValue());
writeValue(b, leaf.nextValue());
}
b.endArray();
}
};
}

protected abstract void loadNextValue(XContentBuilder b, long value) throws IOException;
protected abstract void writeValue(XContentBuilder b, long value) throws IOException;

/**
* Returns a {@link SortedNumericDocValues} or null if it doesn't have any doc values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,10 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
.toList();
return new SourceLoader.SyntheticFieldLoader() {
@Override
public Leaf leaf(LeafReader reader) throws IOException {
public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException {
List<SourceLoader.SyntheticFieldLoader.Leaf> l = new ArrayList<>();
for (SourceLoader.SyntheticFieldLoader field : fields) {
Leaf leaf = field.leaf(reader);
Leaf leaf = field.leaf(reader, docIdsInLeaf);
if (false == leaf.empty()) {
l.add(leaf);
}
Expand Down

0 comments on commit bcca9d1

Please sign in to comment.