Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/138299.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 138299
summary: Add `PerFieldStoredFieldsFormat` to allow multiple stored field formats
area: Codec
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/138357.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 138357
summary: Integrate bloom filter checks with TSDBSyntheticIdPostingsFormat
area: Codec
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;

/**
* Test suite for time series indices that use synthetic ids for documents.
Expand Down Expand Up @@ -103,8 +104,10 @@ public void testInvalidIndexMode() {

public void testSyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
assumeTrue("Test should only run with feature flag", IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
putDataStreamTemplate(dataStreamName, randomIntBetween(1, 5));
final var enableStoredFieldsBloomFilter = randomBoolean();
putDataStreamTemplate(dataStreamName, randomIntBetween(1, 5), enableStoredFieldsBloomFilter);

final var docs = new HashMap<String, String>();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
Expand Down Expand Up @@ -265,14 +268,21 @@ enum Operation {
for (var index : indices) {
var diskUsage = diskUsage(index);
var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
// If the _id stored fields bloom filter is enabled, IndexDiskUsageStats won't account for anything since
// the bloom filter it's not exposed through the Reader API.
if (enableStoredFieldsBloomFilter) {
assertThat(diskUsageIdField, nullValue());
} else {
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
}
}
}

public void testGetFromTranslogBySyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
putDataStreamTemplate(dataStreamName, 1);
final var enableStoredFieldsBloomFilter = randomBoolean();
putDataStreamTemplate(dataStreamName, 1, enableStoredFieldsBloomFilter);

final var docs = new HashMap<String, String>();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
Expand Down Expand Up @@ -376,7 +386,13 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
for (var index : indices) {
var diskUsage = diskUsage(index);
var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
// If the _id stored fields bloom filter is enabled, IndexDiskUsageStats won't account for anything since
// the bloom filter it's not exposed through the Reader API.
if (enableStoredFieldsBloomFilter) {
assertThat(diskUsageIdField, nullValue());
} else {
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
}
}

assertHitCount(client().prepareSearch(dataStreamName).setSize(0), 10L);
Expand Down Expand Up @@ -413,11 +429,12 @@ private static BulkItemResponse[] createDocuments(String indexName, XContentBuil
return bulkResponse.getItems();
}

private static void putDataStreamTemplate(String indexPattern, int shards) throws IOException {
private static void putDataStreamTemplate(String indexPattern, int shards, boolean enableStoredFieldsBloomFilter) throws IOException {
final var settings = indexSettings(shards, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false)
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), -1)
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true);
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true)
.put(IndexSettings.USE_STORED_FIELD_BLOOM_FILTER_ID.getKey(), enableStoredFieldsBloomFilter);

final var mappings = """
{
Expand Down
5 changes: 4 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@
exports org.elasticsearch.index.codec;
exports org.elasticsearch.index.codec.tsdb;
exports org.elasticsearch.index.codec.bloomfilter;
exports org.elasticsearch.index.codec.storedfields;
exports org.elasticsearch.index.codec.zstd;
exports org.elasticsearch.index.engine;
exports org.elasticsearch.index.fielddata;
Expand Down Expand Up @@ -475,7 +476,9 @@
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec,
org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec,
org.elasticsearch.index.codec.Elasticsearch93DefaultCompressionLucene103,
org.elasticsearch.index.codec.Elasticsearch93ZstdCompressionLucene103Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
settings.add(IndexSettings.USE_SYNTHETIC_ID);
}
if (IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG) {
settings.add(IndexSettings.USE_STORED_FIELD_BLOOM_FILTER_ID);
}
settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
};
Expand Down
65 changes: 65 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,57 @@ public Iterator<Setting<?>> settings() {
Property.Final
);

public static final boolean USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG = new FeatureFlag("stored_field_bloom_filter")
.isEnabled();
public static final Setting<Boolean> USE_STORED_FIELD_BLOOM_FILTER_ID = Setting.boolSetting(
"index.mapping.use_stored_field_bloom_filter_id",
false,
new Setting.Validator<>() {
@Override
public void validate(Boolean enabled) {
if (enabled) {
if (USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG == false) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is only permitted when the feature flag is enabled.",
USE_STORED_FIELD_BLOOM_FILTER_ID.getKey()
)
);
}
}
}

@Override
public void validate(Boolean enabled, Map<Setting<?>, Object> settings) {
if (enabled) {
// Verify if index mode is TIME_SERIES
var indexMode = (IndexMode) settings.get(MODE);
if (indexMode != IndexMode.TIME_SERIES) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is only permitted when [%s] is set to [%s]. Current mode: [%s].",
USE_STORED_FIELD_BLOOM_FILTER_ID.getKey(),
MODE.getKey(),
IndexMode.TIME_SERIES.name(),
indexMode.name()
)
);
}
}
}

@Override
public Iterator<Setting<?>> settings() {
List<Setting<?>> list = List.of(MODE);
return list.iterator();
}
},
Property.IndexScope,
Property.Final
);

/**
* The {@link IndexMode "mode"} of the index.
*/
Expand Down Expand Up @@ -1020,6 +1071,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private final boolean useTimeSeriesSyntheticId;
private final boolean useTimeSeriesDocValuesFormat;
private final boolean useEs812PostingsFormat;
private final boolean useStoredFieldsBloomFilterForId;

/**
* The maximum number of refresh listeners allows on this shard.
Expand Down Expand Up @@ -1230,6 +1282,12 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
} else {
useTimeSeriesSyntheticId = false;
}
useStoredFieldsBloomFilterForId = IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG
&& scopedSettings.get(USE_STORED_FIELD_BLOOM_FILTER_ID);
if (useStoredFieldsBloomFilterForId) {
assert indexMetadata.getIndexMode() == IndexMode.TIME_SERIES : indexMetadata.getIndexMode();
assert indexMetadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID);
}
if (recoverySourceSyntheticEnabled) {
if (DiscoveryNode.isStateless(settings)) {
throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful");
Expand Down Expand Up @@ -1969,6 +2027,13 @@ public boolean useTimeSeriesSyntheticId() {
return useTimeSeriesSyntheticId;
}

/**
* @return whether _id fields are stored as bloom filters in time-series indices for fast lookups.
*/
public boolean useStoredFieldsBloomFilterForId() {
return useStoredFieldsBloomFilterForId;
}

/**
* @return Whether the time series doc value format should be used.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID = def(9_044_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_DIMENSIONS_USE_SKIPPERS = def(9_045_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_ALL_FIELDS_USE_SKIPPERS = def(9_046_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID = def(9_047_0_00, Version.LUCENE_10_3_1);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
import org.elasticsearch.index.mapper.MapperService;
Expand Down Expand Up @@ -49,7 +50,19 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
if (IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG) {
if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
codecs.put(
DEFAULT_CODEC,
new PerFieldMapperCodecZstdCompression(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays)
);
} else {
codecs.put(
DEFAULT_CODEC,
new PerFieldMapperCodecDefaultCompression(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays)
);
}
} else if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.storedfields.ESLucene90StoredFieldsFormat;
import org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat;
import org.elasticsearch.index.codec.storedfields.PerFieldStoredFieldsFormat;

public class Elasticsearch93DefaultCompressionLucene103 extends FilterCodec {
private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getKnnVectorsFormatForField(field);
}
};

protected final ESStoredFieldsFormat defaultStoredFieldsFormat;
private final StoredFieldsFormat storedFieldsFormat = new PerFieldStoredFieldsFormat() {
@Override
protected ESStoredFieldsFormat getStoredFieldsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getStoredFieldsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch93DefaultCompressionLucene103() {
this(Lucene103Codec.Mode.BEST_SPEED);
}

public Elasticsearch93DefaultCompressionLucene103(Lucene103Codec.Mode mode) {
super("Elasticsearch93Lucene103", new Lucene103Codec());
this.defaultStoredFieldsFormat = new ESLucene90StoredFieldsFormat(mode);
this.defaultPostingsFormat = new Lucene103PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

// We need to return ES... for the SPI loading
public ESStoredFieldsFormat getStoredFieldsFormatForField(String field) {
return defaultStoredFieldsFormat;
}
}
Loading