From e562e8c6f4ea97d210c80800d92ea6b5945aeab2 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Fri, 24 Oct 2025 18:34:18 +0200
Subject: [PATCH 01/20] Change document _id format for time series datastreams
---
.../datastreams/TSDBSyntheticIdsIT.java | 261 +++++++--
.../cluster/routing/IndexRouting.java | 26 +-
.../lucene/uid/VersionsAndSeqNoResolver.java | 31 +-
.../elasticsearch/index/IndexVersions.java | 1 +
.../codec/tsdb/TSDBSyntheticIdCodec.java | 51 ++
.../tsdb/TSDBSyntheticIdFieldsProducer.java | 505 +++++++++++++-----
.../tsdb/TSDBSyntheticIdPostingsFormat.java | 2 +
.../index/engine/InternalEngine.java | 13 +-
.../index/mapper/ParsedDocument.java | 29 +-
.../mapper/TsidExtractingIdFieldMapper.java | 56 +-
10 files changed, 767 insertions(+), 208 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index b0d14d0d80221..81e99d154060d 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -34,15 +34,18 @@
import java.io.IOException;
import java.time.Instant;
+import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
-import java.util.Random;
import static org.elasticsearch.common.time.FormatNames.STRICT_DATE_OPTIONAL_TIME;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@@ -97,51 +100,56 @@ public void testInvalidIndexMode() {
@TestLogging(reason = "debug", value = "org.elasticsearch.index.engine.Engine:TRACE")
public void testSyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
- final var indexName = randomIdentifier();
- putDataStreamTemplate(random(), indexName);
+ final var dataStreamName = randomIdentifier();
+ putDataStreamTemplate(dataStreamName, randomIntBetween(1, 3));
+ final var docs = new HashMap();
+ final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
final var timestamp = Instant.now();
- // Index 5 docs in datastream
+ // Index 10 docs in datastream
+ //
+ // For convenience, the metric value maps the index in the bulk response items
var results = createDocuments(
- indexName,
- document(timestamp, "vm-dev01", "cpu-load", 0), // will be updated
- document(timestamp.plusSeconds(2), "vm-dev01", "cpu-load", 1), // will be deleted
- document(timestamp, "vm-dev02", "cpu-load", 2),
- document(timestamp.plusSeconds(2), "vm-dev03", "cpu-load", 3),
- document(timestamp.plusSeconds(3), "vm-dev03", "cpu-load", 4)
+ dataStreamName,
+ // t + 0s
+ document(timestamp, "vm-dev01", "cpu-load", 0),
+ document(timestamp, "vm-dev02", "cpu-load", 1),
+ // t + 1s
+ document(timestamp.plus(1, unit), "vm-dev01", "cpu-load", 2),
+ document(timestamp.plus(1, unit), "vm-dev02", "cpu-load", 3),
+ // t + 0s out-of-order doc
+ document(timestamp, "vm-dev03", "cpu-load", 4),
+ // t + 2s
+ document(timestamp.plus(2, unit), "vm-dev01", "cpu-load", 5),
+ document(timestamp.plus(2, unit), "vm-dev02", "cpu-load", 6),
+ // t - 1s out-of-order doc
+ document(timestamp.minus(1, unit), "vm-dev01", "cpu-load", 7),
+ // t + 3s
+ document(timestamp.plus(3, unit), "vm-dev01", "cpu-load", 8),
+ document(timestamp.plus(3, unit), "vm-dev02", "cpu-load", 9)
);
- // Verify documents
- assertThat(results[0].getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
- assertThat(results[0].getVersion(), equalTo(1L));
-
- assertThat(results[1].getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
- assertThat(results[1].getVersion(), equalTo(1L));
-
- assertThat(results[2].getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
- assertThat(results[2].getVersion(), equalTo(1L));
-
- assertThat(results[3].getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
- assertThat(results[3].getVersion(), equalTo(1L));
-
- assertThat(results[4].getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
- assertThat(results[4].getVersion(), equalTo(1L));
-
- final var docIndex = results[1].getIndex();
- final var docId = results[1].getId();
+ // Verify that documents are created
+ for (var result : results) {
+ assertThat(result.getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
+ assertThat(result.getVersion(), equalTo(1L));
+ docs.put(result.getId(), result.getIndex());
+ }
enum Operation {
FLUSH,
REFRESH,
NONE
}
+
+ // Random flush or refresh or nothing, so that the next GETs are executed on flushed segments or in memory segments.
switch (randomFrom(Operation.values())) {
case FLUSH:
- flush(indexName);
+ flush(dataStreamName);
break;
case REFRESH:
- refresh(indexName);
+ refresh(dataStreamName);
break;
case NONE:
default:
@@ -149,46 +157,183 @@ enum Operation {
}
// Get by synthetic _id
- // Note: before synthetic _id this would have required postings on disks
- var getResponse = client().prepareGet(docIndex, docId).setFetchSource(true).execute().actionGet();
- assertThat(getResponse.isExists(), equalTo(true));
- assertThat(getResponse.getVersion(), equalTo(1L));
- var source = asInstanceOf(Map.class, getResponse.getSourceAsMap().get("metric"));
- assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(1));
+ var randomDocs = randomSubsetOf(randomIntBetween(0, results.length), results);
+ for (var doc : randomDocs) {
+ boolean fetchSource = randomBoolean();
+ var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setFetchSource(fetchSource).execute().actionGet();
+ assertThat(getResponse.isExists(), equalTo(true));
+ assertThat(getResponse.getVersion(), equalTo(1L));
+
+ if (fetchSource) {
+ var source = asInstanceOf(Map.class, getResponse.getSourceAsMap().get("metric"));
+ assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(doc.getItemId()));
+ }
+ }
// Update by synthetic _id
+ //
// Note: it doesn't work, is that expected? Is is blocked by IndexRouting.ExtractFromSource.updateShard
+ var updateDocId = randomFrom(docs.keySet());
+ var updateDocIndex = docs.get(updateDocId);
var exception = expectThrows(IllegalArgumentException.class, () -> {
var doc = document(timestamp, "vm-dev01", "cpu-load", 10); // update
- client().prepareUpdate(docIndex, docId).setDoc(doc).get();
+ client().prepareUpdate(updateDocIndex, updateDocId).setDoc(doc).get();
});
assertThat(
exception.getMessage(),
- containsString("update is not supported because the destination index [" + docIndex + "] is in time_series mode")
+ containsString("update is not supported because the destination index [" + updateDocIndex + "] is in time_series mode")
);
+ // Random flush or refresh or nothing, so that the next DELETEs are executed on flushed segments or in memory segments.
+ switch (randomFrom(Operation.values())) {
+ case FLUSH:
+ flush(dataStreamName);
+ break;
+ case REFRESH:
+ refresh(dataStreamName);
+ break;
+ case NONE:
+ default:
+ break;
+ }
+
// Delete by synthetic _id
- var deleteResponse = client().prepareDelete(docIndex, docId).get();
- assertThat(deleteResponse.getId(), equalTo(docId));
- assertThat(deleteResponse.getResult(), equalTo(DocWriteResponse.Result.DELETED));
- assertThat(deleteResponse.getVersion(), equalTo(2L));
-
- // Index more docs
- // TODO Randomize this to have segments only composed of deleted docs
- createDocuments(
- indexName,
- document(timestamp.plusSeconds(4), "vm-dev03", "cpu-load", 5),
- document(timestamp.plusSeconds(5), "vm-dev03", "cpu-load", 6)
+ var deletedDocs = randomSubsetOf(randomIntBetween(1, docs.size()), docs.keySet());
+ for (var deletedDocId : deletedDocs) {
+ var deletedDocIndex = docs.get(deletedDocId);
+
+ // Delete
+ var deleteResponse = client().prepareDelete(deletedDocIndex, deletedDocId).get();
+ assertThat(deleteResponse.getId(), equalTo(deletedDocId));
+ assertThat(deleteResponse.getIndex(), equalTo(deletedDocIndex));
+ assertThat(deleteResponse.getResult(), equalTo(DocWriteResponse.Result.DELETED));
+ assertThat(deleteResponse.getVersion(), equalTo(2L));
+
+ // Get returns "not found"
+ var getResponse = client().prepareGet(deletedDocIndex, deletedDocId).get();
+ assertThat(getResponse.getId(), equalTo(deletedDocId));
+ assertThat(getResponse.getIndex(), equalTo(deletedDocIndex));
+ assertThat(getResponse.isExists(), equalTo(false));
+ }
+
+ flushAndRefresh(dataStreamName);
+
+ // Check that synthetic _id field have no postings on disk
+ var indices = new HashSet<>(docs.values());
+ for (var index : indices) {
+ var diskUsage = diskUsage(index);
+ var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
+ assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+ }
+
+ /* This does not work :-(
+ assertCheckedResponse(
+ client().prepareSearch(dataStreamName).setTrackTotalHits(true),
+ searchResponse -> {
+ assertHitCount(searchResponse, docs.size() - deletedDocs.size());
+
+ // Verify that search response does not contain deleted docs
+ for (var searchHit : searchResponse.getHits()) {
+ assertThat(deletedDocs.contains(searchHit.getId()), equalTo(false));
+ }
+ }
+ );*/
+ }
+
+ public void testGetFromTranslogBySyntheticId() throws Exception {
+ assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
+ final var datastreamName = randomIdentifier();
+ putDataStreamTemplate(datastreamName, 1);
+
+ final var docs = new HashMap();
+ final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
+ final var timestamp = Instant.now();
+
+ // Index 5 docs in datastream
+ //
+ // For convenience, the metric value maps the index in the bulk response items
+ var results = createDocuments(
+ datastreamName,
+ // t + 0s
+ document(timestamp, "vm-dev01", "cpu-load", 0),
+ document(timestamp, "vm-dev02", "cpu-load", 1),
+ // t + 1s
+ document(timestamp.plus(1, unit), "vm-dev01", "cpu-load", 2),
+ document(timestamp.plus(1, unit), "vm-dev02", "cpu-load", 3),
+ // t + 0s out-of-order doc
+ document(timestamp, "vm-dev03", "cpu-load", 4)
);
- flushAndRefresh(indexName);
+ // Verify that documents are created
+ for (var result : results) {
+ assertThat(result.getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
+ assertThat(result.getVersion(), equalTo(1L));
+ docs.put(result.getId(), result.getIndex());
+ }
- // Check that synthetic _id field has no postings on disk
- var diskUsage = diskUsage(docIndex);
- var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
- assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+ // Get by synthetic _id
+ //
+ // The documents are in memory buffers: the first GET will trigger the refresh of the internal reader
+ // (see InternalEngine.REAL_TIME_GET_REFRESH_SOURCE) to have an up-to-date searcher to resolve documents ids and versions. It will
+ // also enable the tracking of the locations of documents in the translog (see InternalEngine.trackTranslogLocation) so that next
+ // GETs will be resolved with the translog.
+ var randomDocs = randomSubsetOf(randomIntBetween(1, results.length), results);
+ for (var doc : randomDocs) {
+ var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setRealtime(true).setFetchSource(true).execute().actionGet();
+ assertThat(getResponse.isExists(), equalTo(true));
+ assertThat(getResponse.getVersion(), equalTo(1L));
+
+ var source = asInstanceOf(Map.class, getResponse.getSourceAsMap().get("metric"));
+ assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(doc.getItemId()));
+ }
+
+ int metricOffset = results.length;
+
+ // Index 5 more docs
+ results = createDocuments(
+ datastreamName,
+ // t + 2s
+ document(timestamp.plus(2, unit), "vm-dev01", "cpu-load", metricOffset),
+ document(timestamp.plus(2, unit), "vm-dev02", "cpu-load", metricOffset + 1),
+ // t - 1s out-of-order doc
+ document(timestamp.minus(1, unit), "vm-dev01", "cpu-load", metricOffset + 2),
+ // t + 3s
+ document(timestamp.plus(3, unit), "vm-dev01", "cpu-load", metricOffset + 3),
+ document(timestamp.plus(3, unit), "vm-dev02", "cpu-load", metricOffset + 4)
+ );
+
+ // Verify that documents are created
+ for (var result : results) {
+ assertThat(result.getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
+ assertThat(result.getVersion(), equalTo(1L));
+ docs.put(result.getId(), result.getIndex());
+ }
+
+ // Get by synthetic _id
+ //
+ // Documents ids and versions are resolved using the translog. Here we exercise the get-from-translog (that uses the
+ // TranslogDirectoryReader) and VersionsAndSeqNoResolver.loadDocIdAndVersionUncached paths.
+ randomDocs = randomSubsetOf(randomIntBetween(1, results.length), results);
+ for (var doc : randomDocs) {
+ var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setRealtime(true).setFetchSource(true).execute().actionGet();
+ assertThat(getResponse.isExists(), equalTo(true));
+ assertThat(getResponse.getVersion(), equalTo(1L));
+
+ var source = asInstanceOf(Map.class, getResponse.getSourceAsMap().get("metric"));
+ assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(metricOffset + doc.getItemId()));
+ }
+
+ flushAndRefresh(datastreamName);
+
+ // Check that synthetic _id field have no postings on disk
+ var indices = new HashSet<>(docs.values());
+ for (var index : indices) {
+ var diskUsage = diskUsage(index);
+ var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
+ assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+ }
- // TODO Search datastream and count hits
+ assertHitCount(client().prepareSearch(datastreamName).setSize(0), 10L);
}
private static XContentBuilder document(Instant timestamp, String hostName, String metricField, Integer metricValue)
@@ -210,7 +355,7 @@ private static XContentBuilder document(Instant timestamp, String hostName, Stri
return source;
}
- private static BulkItemResponse[] createDocuments(String indexName, XContentBuilder... docs) throws IOException {
+ private static BulkItemResponse[] createDocuments(String indexName, XContentBuilder... docs) {
assertThat(docs, notNullValue());
final var client = client();
var bulkRequest = client.prepareBulk();
@@ -222,8 +367,8 @@ private static BulkItemResponse[] createDocuments(String indexName, XContentBuil
return bulkResponse.getItems();
}
- private static void putDataStreamTemplate(Random random, String indexPattern) throws IOException {
- final var settings = indexSettings(1, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
+ private static void putDataStreamTemplate(String indexPattern, int shards) throws IOException {
+ final var settings = indexSettings(shards, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false)
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), -1)
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true);
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
index 12d45898bfba5..3c1a7c8fcbaed 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -26,9 +26,11 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
+import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
import org.elasticsearch.transport.Transports;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
@@ -321,6 +323,7 @@ public abstract static class ExtractFromSource extends IndexRouting {
protected final XContentParserConfiguration parserConfig;
private final IndexMode indexMode;
private final boolean trackTimeSeriesRoutingHash;
+ private final boolean useTimeSeriesSyntheticId;
private final boolean addIdWithRoutingHash;
private int hash = Integer.MAX_VALUE;
@@ -333,6 +336,9 @@ public abstract static class ExtractFromSource extends IndexRouting {
assert indexMode != null : "Index mode must be set for ExtractFromSource routing";
this.trackTimeSeriesRoutingHash = indexMode == IndexMode.TIME_SERIES
&& metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID);
+ this.useTimeSeriesSyntheticId = trackTimeSeriesRoutingHash
+ && metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)
+ && IndexSettings.USE_SYNTHETIC_ID.get(metadata.getSettings());
addIdWithRoutingHash = indexMode == IndexMode.LOGSDB;
this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.copyOf(includePaths), null, true);
}
@@ -391,6 +397,7 @@ public int updateShard(String id, @Nullable String routing) {
public int deleteShard(String id, @Nullable String routing) {
checkNoRouting(routing);
int shardId = idToHash(id);
+ System.out.println("id " + id + " routed to " + shardId);
return rerouteWritesIfResharding(shardId);
}
@@ -417,10 +424,19 @@ private int idToHash(String id) {
if (idBytes.length < 4) {
throw new ResourceNotFoundException("invalid id [{}] for index [{}] in " + indexMode.getName() + " mode", id, indexName);
}
- // For TSDB, the hash is stored as the id prefix.
- // For LogsDB with routing on sort fields, the routing hash is stored in the range[id.length - 9, id.length - 5] of the id,
- // see IndexRequest#autoGenerateTimeBasedId.
- return hashToShardId(ByteUtils.readIntLE(idBytes, addIdWithRoutingHash ? idBytes.length - 9 : 0));
+ int hash;
+ if (addIdWithRoutingHash) {
+ // For LogsDB with routing on sort fields, the routing hash is stored in the range[id.length - 9, id.length - 5] of the id,
+ // see IndexRequest#autoGenerateTimeBasedId.
+ hash = ByteUtils.readIntLE(idBytes, idBytes.length - 9);
+ } else if (useTimeSeriesSyntheticId) {
+ // For TSDB with synthetic ids, the hash is stored as the id suffix.
+ hash = TsidExtractingIdFieldMapper.extractRoutingHashFromSyntheticId(idBytes);
+ } else {
+ // For TSDB, the hash is stored as the id prefix.
+ hash = ByteUtils.readIntLE(idBytes, 0);
+ }
+ return hashToShardId(hash);
}
@Override
@@ -510,7 +526,7 @@ public static class ForIndexDimensions extends ExtractFromSource {
@Override
protected int hashSource(IndexRequest indexRequest) {
- // System.out.println("hashSource for tsid");
+ System.out.println("hashSource for tsid");
BytesRef tsid = indexRequest.tsid();
if (tsid == null) {
tsid = buildTsid(indexRequest.getContentType(), indexRequest.indexSource().bytes());
diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
index 08a8e28457159..b5485f3cbf1f9 100644
--- a/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
+++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
@@ -14,9 +14,9 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
-import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.core.Assertions;
+import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
import java.io.IOException;
import java.util.Base64;
@@ -153,22 +153,29 @@ public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(IndexReader reader,
* This allows this method to know whether there is no document with the specified id without loading the docid for
* the specified id.
*
- * @param reader The reader load docid, version and seqno from.
- * @param uid The term that describes the uid of the document to load docid, version and seqno for.
- * @param id The id that contains the encoded timestamp. The timestamp is used to skip checking the id for entire segments.
- * @param loadSeqNo Whether to load sequence number from _seq_no doc values field.
+ * @param reader The reader load docid, version and seqno from.
+ * @param uid The term that describes the uid of the document to load docid, version and seqno for.
+ * @param id The id that contains the encoded timestamp. The timestamp is used to skip checking the id for entire segments.
+ * @param loadSeqNo Whether to load sequence number from _seq_no doc values field.
+ * @param useSyntheticId Whether the id is a synthetic (true) or standard (false ) document id.
* @return the internal doc ID and version for the specified term from the specified reader or
* returning null if no document was found for the specified id
* @throws IOException In case of an i/o related failure
*/
- public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(IndexReader reader, BytesRef uid, String id, boolean loadSeqNo)
- throws IOException {
+ public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(
+ IndexReader reader,
+ BytesRef uid,
+ String id,
+ boolean loadSeqNo,
+ boolean useSyntheticId
+ ) throws IOException {
byte[] idAsBytes = Base64.getUrlDecoder().decode(id);
- assert idAsBytes.length == 20;
- // id format: [4 bytes (basic hash routing fields), 8 bytes prefix of 128 murmurhash dimension fields, 8 bytes
- // @timestamp)
- long timestamp = ByteUtils.readLongBE(idAsBytes, 12);
-
+ final long timestamp;
+ if (useSyntheticId) {
+ timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(idAsBytes);
+ } else {
+ timestamp = TsidExtractingIdFieldMapper.extractTimestampFromId(idAsBytes);
+ }
PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, true);
List leaves = reader.leaves();
// iterate in default order, the segments should be sorted by DataStream#TIMESERIES_LEAF_READERS_SORTER
diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java
index e63b655e2ce8d..172bdc67e7872 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java
@@ -192,6 +192,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion REENABLED_TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_042_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion SKIPPERS_ENABLED_BY_DEFAULT = def(9_043_0_00, Version.LUCENE_10_3_1);
+ public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID = def(9_044_0_00, Version.LUCENE_10_3_1);
/*
* STOP! READ THIS FIRST! No, really,
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
index 970664844631a..4d885fbc88e1a 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
@@ -27,6 +27,7 @@
import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.SYNTHETIC_ID;
import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.TIMESTAMP;
import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.TS_ID;
+import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.TS_ROUTING_HASH;
/**
* Special codec for time-series datastreams that use synthetic ids.
@@ -83,6 +84,13 @@ private void ensureSyntheticIdFields(FieldInfos fieldInfos) {
assert false : message;
throw new IllegalArgumentException(message);
}
+ // Ensure _ts_routing_hash exists
+ fi = fieldInfos.fieldInfo(TS_ROUTING_HASH);
+ if (fi == null) {
+ var message = "Field [" + TS_ROUTING_HASH + "] does not exist";
+ assert false : message;
+ throw new IllegalArgumentException(message);
+ }
// Ensure _id exists and not indexed
fi = fieldInfos.fieldInfo(SYNTHETIC_ID);
if (fi == null) {
@@ -102,6 +110,49 @@ private void ensureSyntheticIdFields(FieldInfos fieldInfos) {
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos fieldInfos, IOContext context)
throws IOException {
+
+ // Change the _id field index options from IndexOptions.DOCS to IndexOptions.NONE
+ final var infos = new FieldInfo[fieldInfos.size()];
+ int i = 0;
+ for (FieldInfo fi : fieldInfos) {
+ if (SYNTHETIC_ID.equals(fi.getName())) {
+ final var attributes = new HashMap<>(fi.attributes());
+
+ // Assert that PerFieldPostingsFormat are not present or have the expected format and suffix
+ assert attributes.get(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY) == null
+ || TSDBSyntheticIdPostingsFormat.FORMAT_NAME.equals(attributes.get(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY));
+ assert attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY) == null
+ || TSDBSyntheticIdPostingsFormat.SUFFIX.equals(attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY));
+
+ // Remove attributes if present
+ attributes.remove(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY);
+ attributes.remove(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
+
+ fi = new FieldInfo(
+ fi.getName(),
+ fi.getFieldNumber(),
+ fi.hasTermVectors(),
+ true,
+ fi.hasPayloads(),
+ IndexOptions.NONE,
+ fi.getDocValuesType(),
+ fi.docValuesSkipIndexType(),
+ fi.getDocValuesGen(),
+ attributes,
+ fi.getPointDimensionCount(),
+ fi.getPointIndexDimensionCount(),
+ fi.getPointNumBytes(),
+ fi.getVectorDimension(),
+ fi.getVectorEncoding(),
+ fi.getVectorSimilarityFunction(),
+ fi.isSoftDeletesField(),
+ fi.isParentField()
+ );
+ }
+ infos[i++] = fi;
+ }
+
+ fieldInfos = new FieldInfos(infos);
ensureSyntheticIdFields(fieldInfos);
delegate.write(directory, segmentInfo, segmentSuffix, fieldInfos, context);
}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index 2f624fd2d9cd0..2b70d7a9adfef 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -12,6 +12,7 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.BaseTermsEnum;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
@@ -28,7 +29,6 @@
import org.elasticsearch.index.mapper.Uid;
import java.io.IOException;
-import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.Objects;
import java.util.Set;
@@ -85,7 +85,7 @@ public Terms terms(String field) throws IOException {
return new Terms() {
@Override
public TermsEnum iterator() {
- return new FakeTermsEnum();
+ return new SyntheticIdTermsEnum();
}
@Override
@@ -131,97 +131,377 @@ public boolean hasPayloads() {
}
/**
- * This is a fake TermsEnum that scans all documents for find docs matching a specific _id. This implementation is only here to show
- * that the synthetic _id terms is used when applying doc values updates during soft-updates. It is buggy and should not be used besides
- * some carefully crafted integration tests, because it relies on the current _id format for TSDB indices that has limitations:
- * - it is composed of a routing hash, a @timestamp and a tsid that cannot be un-hashed so all docs must be scanned to find matchings
- * - it is not sorted on _id in the Lucene segments so doc values updates stop too early when applying DV updates
- *
- * This fake terms enumeration will be changed to support a different _id format in a short future.
+ * Holds all the doc values used in the {@link TermsEnum} and {@link PostingsEnum} to lookup and to build synthetic _ids, along with
+ * some utility methods to access doc values.
+ *
+ * It holds the instance of {@link DocValuesProducer} used to create the sorted doc values for _tsid, @timestamp and
+ * _ts_routing_hash. Because doc values can only advance, they are re-created from the {@link DocValuesProducer} when we need to
+ * seek backward.
+ *
*/
- private class FakeTermsEnum extends BaseTermsEnum {
+ private static class DocValuesHolder {
+
+ private final FieldInfo tsIdFieldInfo;
+ private final FieldInfo timestampFieldInfo;
+ private final FieldInfo routingHashFieldInfo;
+ private final DocValuesProducer docValuesProducer;
+
+ private SortedNumericDocValues timestampDocValues; // sorted desc. order
+ private SortedDocValues routingHashDocValues; // sorted asc. order
+ private SortedDocValues tsIdDocValues; // sorted asc. order
+ // Keep around the latest tsId ordinal and value
+ private int cachedTsIdOrd = -1;
+ private BytesRef cachedTsId;
+
+ private DocValuesHolder(FieldInfos fieldInfos, DocValuesProducer docValuesProducer) {
+ this.tsIdFieldInfo = safeFieldInfo(fieldInfos, TSDBSyntheticIdPostingsFormat.TS_ID);
+ this.timestampFieldInfo = safeFieldInfo(fieldInfos, TSDBSyntheticIdPostingsFormat.TIMESTAMP);
+ this.routingHashFieldInfo = safeFieldInfo(fieldInfos, TSDBSyntheticIdPostingsFormat.TS_ROUTING_HASH);
+ this.docValuesProducer = docValuesProducer;
+ }
- private BytesRef term = null;
- private int docID = -1;
+ private FieldInfo safeFieldInfo(FieldInfos fieldInfos, String fieldName) {
+ var fi = fieldInfos.fieldInfo(fieldName);
+ if (fi == null) {
+ var message = "Field [" + fieldName + "] does not exist";
+ assert false : message;
+ throw new IllegalArgumentException(message);
+ }
+ return fi;
+ }
+
+ /**
+ * Returns the _tsid ordinal value for a given docID. The document ID must exist and must have a value for the field.
+ *
+ * @param docID the docID
+ * @return the _tsid ordinal value
+ * @throws IOException if any I/O exception occurs
+ */
+ private int docTsIdOrdinal(int docID) throws IOException {
+ if (tsIdDocValues == null || tsIdDocValues.docID() > docID) {
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ }
+ boolean found = tsIdDocValues.advanceExact(docID);
+ assert found : "No value found for field [" + tsIdFieldInfo.getName() + " and docID " + docID;
+ return tsIdDocValues.ordValue();
+ }
+
+ /**
+ * Returns the timestamp value for a given docID. The document ID must exist and must have a value for the field.
+ *
+ * @param docID the docID
+ * @return the timestamp value
+ * @throws IOException if any I/O exception occurs
+ */
+ private long docTimestamp(int docID) throws IOException {
+ if (timestampDocValues == null || timestampDocValues.docID() > docID) {
+ timestampDocValues = docValuesProducer.getSortedNumeric(timestampFieldInfo);
+ }
+ boolean found = timestampDocValues.advanceExact(docID);
+ assert found : "No value found for field [" + timestampFieldInfo.getName() + " and docID " + docID;
+ assert timestampDocValues.docValueCount() == 1;
+ return timestampDocValues.nextValue();
+ }
+
+ /**
+ * Returns the routing hash value for a given docID. The document ID must exist and must have a value for the field.
+ *
+ * @param docID the docID
+ * @return the routing hash value
+ * @throws IOException if any I/O exception occurs
+ */
+ private BytesRef docRoutingHash(int docID) throws IOException {
+ if (routingHashDocValues == null || routingHashDocValues.docID() > docID) {
+ routingHashDocValues = docValuesProducer.getSorted(routingHashFieldInfo);
+ }
+ boolean found = routingHashDocValues.advanceExact(docID);
+ assert found : "No value found for field [" + routingHashFieldInfo.getName() + " and docID " + docID;
+ return routingHashDocValues.lookupOrd(routingHashDocValues.ordValue());
+ }
+
+ /**
+ * Lookup if a given _tsid exists, returning a positive ordinal if it exists otherwise it returns -insertionPoint-1.
+ *
+ * @param tsId the _tsid to look up
+ * @return a positive ordinal if the _tsid exists, else returns -insertionPoint-1.
+ * @throws IOException if any I/O exception occurs
+ */
+ private int lookupTsIdTerm(BytesRef tsId) throws IOException {
+ int compare = Integer.MAX_VALUE;
+ if (cachedTsId != null) {
+ compare = cachedTsId.compareTo(tsId);
+ if (compare == 0) {
+ return cachedTsIdOrd;
+ }
+ }
+ if (tsIdDocValues == null || compare > 0) {
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ }
+ int ordinal = tsIdDocValues.lookupTerm(tsId);
+ if (0 <= ordinal) {
+ cachedTsIdOrd = ordinal;
+ cachedTsId = tsId;
+ }
+ return ordinal;
+ }
- private BytesRef latestTsId = null;
- private long latestTimestamp = -1L;
+ /**
+ * Lookup the _tsid value for the given ordinal.
+ *
+ * @param tsIdOrdinal the _tsid ordinal
+ * @return the _tsid value
+ * @throws IOException if any I/O exception occurs
+ */
+ private BytesRef lookupTsIdOrd(int tsIdOrdinal) throws IOException {
+ if (cachedTsIdOrd != -1 && cachedTsIdOrd == tsIdOrdinal) {
+ return cachedTsId;
+ }
+ if (tsIdDocValues == null || tsIdDocValues.ordValue() > tsIdOrdinal) {
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ }
+ assert 0 <= tsIdOrdinal : tsIdOrdinal;
+ assert tsIdOrdinal < tsIdDocValues.getValueCount() : tsIdOrdinal;
+ var tsId = tsIdDocValues.lookupOrd(tsIdOrdinal);
+ if (tsId != null) {
+ cachedTsIdOrd = tsIdOrdinal;
+ cachedTsId = tsId;
+ }
+ return tsId;
+ }
- private FakeTermsEnum() {}
+ /**
+ * Scan all documents to find the first document that has a _tsid equal or greater than the provided _tsid ordinal, returning its
+ * document ID. If no document is found, the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
+ *
+ * Warning: This method is very slow because it potentially scans all documents in the segment.
+ */
+ private int slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) throws IOException {
+ // recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
+ if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) { // can't use tsIdDocValues.ordValue() here??
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ }
+ assert 0 <= tsIdOrd : tsIdOrd;
+ assert tsIdOrd < tsIdDocValues.getValueCount() : tsIdOrd;
+
+ for (int docID = 0; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
+ boolean found = tsIdDocValues.advanceExact(docID);
+ assert found : "No value found for field [" + tsIdFieldInfo.getName() + " and docID " + docID;
+ var ord = tsIdDocValues.ordValue();
+ if (ord == tsIdOrd || tsIdOrd < ord) {
+ if (ord != cachedTsIdOrd) {
+ cachedTsId = tsIdDocValues.lookupOrd(ord);
+ cachedTsIdOrd = ord;
+ }
+ return docID;
+ }
+ }
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ /**
+ * Scan all documents to find the first document that has a _tsid equal to the provided _tsid ordinal, returning its
+ * document ID. If no document is found, the method returns {@link DocIdSetIterator#NO_MORE_DOCS}.
+ *
+ * Warning: This method is very slow because it potentially scans all documents in the segment.
+ */
+ private int slowScanToFirstDocWithTsIdOrdinalEqualTo(int tsIdOrd) throws IOException {
+ // recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
+ if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) { // can't use tsIdDocValues.ordValue() here??
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ }
+ assert 0 <= tsIdOrd : tsIdOrd;
+ assert tsIdOrd < tsIdDocValues.getValueCount() : tsIdOrd;
+
+ for (int docID = 0; docID != DocIdSetIterator.NO_MORE_DOCS; docID = tsIdDocValues.nextDoc()) {
+ boolean found = tsIdDocValues.advanceExact(docID);
+ assert found : "No value found for field [" + tsIdFieldInfo.getName() + " and docID " + docID;
+ var ord = tsIdDocValues.ordValue();
+ if (ord == tsIdOrd) {
+ if (ord != cachedTsIdOrd) {
+ cachedTsId = tsIdDocValues.lookupOrd(ord);
+ cachedTsIdOrd = ord;
+ }
+ return docID;
+ } else if (tsIdOrd < ord) {
+ break;
+ }
+ }
+ cachedTsIdOrd = -1;
+ cachedTsId = null;
+ assert false : "Method must be called with an existing _tsid ordinal: " + tsIdOrd;
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ private int getTsIdValueCount() throws IOException {
+ if (tsIdDocValues == null) {
+ tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
+ }
+ return tsIdDocValues.getValueCount();
+ }
+ }
+
+ /**
+ * Represents the synthetic term the {@link TermsEnum} or {@link PostingsEnum} is positioned on. It points to a given docID and its
+ * corresponding _tsid, @timestamp and _ts_routing_hash values. The {@link #term()} method returns the synthetic _id of the document.
+ */
+ private record SyntheticTerm(int docID, int tsIdOrd, BytesRef tsId, long timestamp, BytesRef routingHash) {
+ private BytesRef term() {
+ assert docID >= 0 : docID;
+ assert tsIdOrd >= 0 : tsIdOrd;
+ return syntheticId(tsId, timestamp, routingHash);
+ }
+ }
+
+ /**
+ * When returned by next(), seekCeil(), nextDoc() and docID() it means there are no more synthetic terms in the {@link TermsEnum}
+ * or {@link PostingsEnum}.
+ */
+ private static final SyntheticTerm NO_MORE_DOCS = new SyntheticTerm(DocIdSetIterator.NO_MORE_DOCS, -1, null, -1L, null);
+
+ /**
+ * {@link TermsEnum} to iterate over documents synthetic _ids.
+ */
+ private class SyntheticIdTermsEnum extends BaseTermsEnum {
+
+ /**
+ * Holds all doc values that composed the synthetic _id
+ */
+ private final DocValuesHolder docValues;
+
+ /**
+ * Current synthetic term the enum is positioned on. It points to 1 document.
+ */
+ private SyntheticTerm current;
+
+ private SyntheticIdTermsEnum() {
+ this.docValues = new DocValuesHolder(fieldInfos, docValuesProducer);
+ this.current = null;
+ }
+
+ private void ensurePositioned() {
+ if (current == null || current == NO_MORE_DOCS) {
+ assert false;
+ throw new IllegalStateException("Method should not be called when unpositioned");
+ }
+ }
@Override
public BytesRef next() throws IOException {
- if (docID == DocIdSetIterator.NO_MORE_DOCS) {
- assert term == null;
+ if (current == NO_MORE_DOCS) {
return null;
}
- docID += 1;
+ int docID = (current != null) ? current.docID + 1 : 0;
if (maxDocs <= docID) {
- docID = DocIdSetIterator.NO_MORE_DOCS;
- latestTimestamp = -1L;
- latestTsId = null;
- term = null;
+ current = NO_MORE_DOCS;
return null;
}
-
- // Retrieve _tsid
- SortedDocValues tsIdDocValues = docValuesProducer.getSorted(fieldInfos.fieldInfo(TS_ID));
- boolean found = tsIdDocValues.advanceExact(docID);
- assert found;
- int tsIdOrd = tsIdDocValues.ordValue();
- BytesRef tsId = tsIdDocValues.lookupOrd(tsIdOrd);
- assert tsId != null;
-
- // Retrieve timestamp
- SortedNumericDocValues timestampDocValues = docValuesProducer.getSortedNumeric(fieldInfos.fieldInfo(TIMESTAMP));
- found = timestampDocValues.advanceExact(docID);
- assert found;
- assert timestampDocValues.docValueCount() == 1;
- long timestamp = timestampDocValues.nextValue();
-
- // Retrieve routing hash
- var tsRoutingHash = fieldInfos.fieldInfo(TimeSeriesRoutingHashFieldMapper.NAME);
- assert tsRoutingHash != null;
- SortedDocValues routingHashDocValues = docValuesProducer.getSorted(tsRoutingHash);
- found = routingHashDocValues.advanceExact(docID);
- assert found;
- BytesRef routingHashBytes = routingHashDocValues.lookupOrd(routingHashDocValues.ordValue());
-
- int routingHash = TimeSeriesRoutingHashFieldMapper.decode(
- Uid.decodeId(routingHashBytes.bytes, routingHashBytes.offset, routingHashBytes.length)
+ int tsIdOrdinal = docValues.docTsIdOrdinal(docID);
+ current = new SyntheticTerm(
+ docID,
+ tsIdOrdinal,
+ docValues.lookupTsIdOrd(tsIdOrdinal),
+ docValues.docTimestamp(docID),
+ docValues.docRoutingHash(docID)
);
- term = Uid.encodeId(TsidExtractingIdFieldMapper.createId(routingHash, tsId, timestamp));
- latestTimestamp = timestamp;
- latestTsId = tsId;
- return term;
+ return current.term();
}
@Override
- public SeekStatus seekCeil(BytesRef id) {
+ public SeekStatus seekCeil(BytesRef id) throws IOException {
assert id != null;
- if (term != null && term.equals(id)) {
- return SeekStatus.FOUND;
+ assert Long.BYTES + Integer.BYTES < id.length : id.length;
+ if (id == null || id.length <= Long.BYTES + Integer.BYTES) {
+ return SeekStatus.NOT_FOUND;
}
- try {
- while (next() != null) {
- if (term.equals(id)) {
- return SeekStatus.FOUND;
+
+ // Extract the _tsid
+ final BytesRef tsId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(id.bytes);
+ int tsIdOrd = docValues.lookupTsIdTerm(tsId);
+
+ // _tsid not found
+ if (tsIdOrd < 0) {
+ tsIdOrd = -tsIdOrd - 1;
+ // set the terms enum on the first non-matching document
+ if (tsIdOrd < docValues.getTsIdValueCount()) {
+ int docID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(tsIdOrd);
+ if (docID != DocIdSetIterator.NO_MORE_DOCS) {
+ current = new SyntheticTerm(
+ docID,
+ tsIdOrd,
+ docValues.lookupTsIdOrd(tsIdOrd),
+ docValues.docTimestamp(docID),
+ docValues.docRoutingHash(docID)
+ );
+ return SeekStatus.NOT_FOUND;
}
}
- } catch (IOException e) {
- throw new UncheckedIOException(e);
+ // no docs/terms to iterate on
+ current = NO_MORE_DOCS;
+ return SeekStatus.END;
}
- return SeekStatus.END;
+
+ // _tsid found, extract the timestamp
+ final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id.bytes);
+
+ // Slow scan to the first document matching the _tsid
+ final int startDocID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
+ assert 0 <= startDocID : startDocID;
+
+ int docID = startDocID;
+ int docTsIdOrd = tsIdOrd;
+ long docTimestamp = -1;
+
+ // Iterate over documents to find the first one matching the timestamp
+ for (; docID < maxDocs; docID++) {
+ docTimestamp = docValues.docTimestamp(docID);
+ if (startDocID < docID) {
+ // After the first doc, we need to check again if _tsid matches
+ docTsIdOrd = docValues.docTsIdOrdinal(docID);
+ }
+ if (docTsIdOrd == tsIdOrd && docTimestamp == timestamp) {
+ // It's a match!
+ current = new SyntheticTerm(docID, tsIdOrd, tsId, docTimestamp, docValues.docRoutingHash(docID));
+ return SeekStatus.FOUND;
+ }
+ // Remaining docs don't match, stop here
+ if (tsIdOrd < docTsIdOrd || docTimestamp < timestamp) {
+ break;
+ }
+ }
+
+ // set the terms enum on the first non-matching document
+ current = new SyntheticTerm(
+ docID,
+ docTsIdOrd,
+ docValues.lookupTsIdOrd(docTsIdOrd),
+ docTimestamp,
+ docValues.docRoutingHash(docID)
+ );
+ return SeekStatus.NOT_FOUND;
}
@Override
public BytesRef term() {
- return term;
+ ensurePositioned();
+ return current.term();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) {
- return new FakePostingsEnum(docID, latestTsId, latestTimestamp, maxDocs);
+ ensurePositioned();
+ return new SyntheticIdPostingsEnum(current);
}
/**
@@ -258,23 +538,19 @@ public ImpactsEnum impacts(int flags) throws IOException {
}
}
- /**
- * Do not use in production. See {@link FakeTermsEnum}.
- */
- private class FakePostingsEnum extends PostingsEnum {
+ private class SyntheticIdPostingsEnum extends PostingsEnum {
- private final int startDocID;
- private final BytesRef latestTsId;
- private final long latestTimestamp;
- private final int maxDocs;
- private int docID;
+ private final DocValuesHolder docValues;
- private FakePostingsEnum(int docID, BytesRef latestTsId, long latestTimestamp, int maxDocs) {
- this.startDocID = docID;
- this.latestTsId = latestTsId;
- this.latestTimestamp = latestTimestamp;
- this.maxDocs = maxDocs;
- this.docID = -1;
+ /**
+ * Current synthetic term the postings is pinned on.
+ */
+ private final SyntheticTerm term;
+ private int docID = -1;
+
+ private SyntheticIdPostingsEnum(SyntheticTerm term) {
+ this.docValues = new DocValuesHolder(fieldInfos, docValuesProducer);
+ this.term = Objects.requireNonNull(term);
}
@Override
@@ -286,61 +562,27 @@ public int docID() {
public int nextDoc() throws IOException {
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
return docID;
- } else if (docID == -1) {
- docID = startDocID;
- } else {
- docID = docID + 1;
- if (maxDocs <= docID) {
- docID = DocIdSetIterator.NO_MORE_DOCS;
- return docID;
- }
- }
-
- // Retrieve _tsid
- SortedDocValues tsIdDocValues = docValuesProducer.getSorted(fieldInfos.fieldInfo(TS_ID));
- boolean found = tsIdDocValues.advanceExact(docID);
- assert found;
- int tsIdOrd = tsIdDocValues.ordValue();
- BytesRef tsId = tsIdDocValues.lookupOrd(tsIdOrd);
- assert tsId != null;
-
- if (latestTsId != null && latestTsId.equals(tsId) == false) {
- // Different _tsid, stop here
- docID = DocIdSetIterator.NO_MORE_DOCS;
- return docID;
}
-
- // Retrieve timestamp
- SortedNumericDocValues timestampDocValues = docValuesProducer.getSortedNumeric(fieldInfos.fieldInfo(TIMESTAMP));
- found = timestampDocValues.advanceExact(docID);
- assert found;
- assert timestampDocValues.docValueCount() == 1;
- long timestamp = timestampDocValues.nextValue();
-
- if (latestTimestamp != -1L && latestTimestamp != timestamp) {
- // Different @timestamp, stop here
- docID = DocIdSetIterator.NO_MORE_DOCS;
- return docID;
+ int nextDocID = (docID == -1) ? term.docID() : docID + 1;
+ if (nextDocID < maxDocs) {
+ int tsIdOrd = docValues.docTsIdOrdinal(nextDocID);
+ if (tsIdOrd == term.tsIdOrd()) {
+ long timestamp = docValues.docTimestamp(nextDocID);
+ if (timestamp == term.timestamp()) {
+ assert Objects.equals(docValues.docRoutingHash(nextDocID), term.routingHash());
+ assert Objects.equals(docValues.lookupTsIdOrd(tsIdOrd), term.tsId());
+ docID = nextDocID;
+ return docID;
+ }
+ }
}
-
- // Retrieve routing hash
- var tsRoutingHash = fieldInfos.fieldInfo(TimeSeriesRoutingHashFieldMapper.NAME);
- assert tsRoutingHash != null;
- SortedDocValues routingHashDocValues = docValuesProducer.getSorted(tsRoutingHash);
- found = routingHashDocValues.advanceExact(docID);
- assert found;
- BytesRef routingHashBytes = routingHashDocValues.lookupOrd(routingHashDocValues.ordValue());
- assert routingHashBytes != null;
+ docID = DocIdSetIterator.NO_MORE_DOCS;
return docID;
}
@Override
public int advance(int target) throws IOException {
- int doc;
- while ((doc = nextDoc()) < target) {
- // Continue
- }
- return doc;
+ return slowAdvance(target);
}
@Override
@@ -374,6 +616,15 @@ public BytesRef getPayload() throws IOException {
}
}
+ private static BytesRef syntheticId(BytesRef tsId, long timestamp, BytesRef routingHashBytes) {
+ assert tsId != null;
+ assert timestamp > 0L;
+ assert routingHashBytes != null;
+ String routingHashString = Uid.decodeId(routingHashBytes.bytes, routingHashBytes.offset, routingHashBytes.length);
+ int routingHash = TimeSeriesRoutingHashFieldMapper.decode(routingHashString);
+ return TsidExtractingIdFieldMapper.createSyntheticIdBytesRef(tsId, timestamp, routingHash);
+ }
+
private static boolean assertFieldInfosExist(FieldInfos fieldInfos, String... fieldNames) {
assert fieldNames != null && fieldNames.length > 0 : "fieldNames should be > 0";
for (var fieldName : fieldNames) {
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormat.java
index cfe9975f33a1b..66a6aa7151c6b 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormat.java
@@ -19,6 +19,7 @@
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
import org.elasticsearch.index.mapper.SyntheticIdField;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import java.io.IOException;
@@ -27,6 +28,7 @@ public class TSDBSyntheticIdPostingsFormat extends PostingsFormat {
public static final String SYNTHETIC_ID = SyntheticIdField.NAME;
public static final String TIMESTAMP = DataStreamTimestampFieldMapper.DEFAULT_PATH;
public static final String TS_ID = TimeSeriesIdFieldMapper.NAME;
+ public static final String TS_ROUTING_HASH = TimeSeriesRoutingHashFieldMapper.NAME;
static final String FORMAT_NAME = "TSDBSyntheticId";
static final String SUFFIX = "0";
diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
index 47e9ab7803a84..7a114b762f952 100644
--- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
+++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
@@ -1067,7 +1067,13 @@ private VersionValue resolveDocVersion(final Operation op, boolean loadSeqNo) th
directoryReader -> {
if (engineConfig.getIndexSettings().getMode() == IndexMode.TIME_SERIES) {
assert engineConfig.getLeafSorter() == DataStream.TIMESERIES_LEAF_READERS_SORTER;
- return VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, op.uid(), op.id(), loadSeqNo);
+ return VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(
+ directoryReader,
+ op.uid(),
+ op.id(),
+ loadSeqNo,
+ useTsdbSyntheticId
+ );
} else {
return VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, op.uid(), loadSeqNo);
}
@@ -1859,8 +1865,10 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws
try {
final ParsedDocument tombstone = ParsedDocument.deleteTombstone(
engineConfig.getIndexSettings().seqNoIndexOptions(),
+ engineConfig.getIndexSettings().useDocValuesSkipper(),
useTsdbSyntheticId,
- delete.id()
+ delete.id(),
+ delete.uid()
);
assert tombstone.docs().size() == 1 : "Tombstone doc should have single doc [" + tombstone + "]";
tombstone.updateSeqID(delete.seqNo(), delete.primaryTerm());
@@ -1869,6 +1877,7 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null
: "Delete tombstone document but _tombstone field is not set [" + doc + " ]";
doc.add(softDeletesField);
+ logDocumentsDetails(List.of(doc));
if (plan.addStaleOpToLucene || plan.currentlyDeleted) {
indexWriter.addDocument(doc);
} else {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java b/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
index 61b26ca33b1ef..17708a2d2ad82 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
@@ -10,6 +10,9 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
@@ -73,7 +76,7 @@ public static ParsedDocument noopTombstone(SeqNoFieldMapper.SeqNoIndexOptions se
* @param id the id of the deleted document
*/
public static ParsedDocument deleteTombstone(SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions, String id) {
- return deleteTombstone(seqNoIndexOptions, false, id);
+ return deleteTombstone(seqNoIndexOptions, false /* ignored */, false, id, null /* ignored */);
}
/**
@@ -82,7 +85,13 @@ public static ParsedDocument deleteTombstone(SeqNoFieldMapper.SeqNoIndexOptions
* @param useSyntheticId whether the id is synthetic or not
* @param id the id of the deleted document
*/
- public static ParsedDocument deleteTombstone(SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions, boolean useSyntheticId, String id) {
+ public static ParsedDocument deleteTombstone(
+ SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions,
+ boolean useDocValuesSkipper,
+ boolean useSyntheticId,
+ String id,
+ BytesRef uid
+ ) {
LuceneDocument document = new LuceneDocument();
SeqNoFieldMapper.SequenceIDFields seqIdFields = SeqNoFieldMapper.SequenceIDFields.tombstone(seqNoIndexOptions);
seqIdFields.addFields(document);
@@ -91,7 +100,21 @@ public static ParsedDocument deleteTombstone(SeqNoFieldMapper.SeqNoIndexOptions
if (useSyntheticId) {
// Use a synthetic _id field which is not indexed nor stored
document.add(IdFieldMapper.syntheticIdField(id));
- // TODO I think we also need to add the fields that compose the synthetic _id.
+
+ var timeSeriesId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(uid.bytes);
+ var timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid.bytes);
+ var routingHash = TsidExtractingIdFieldMapper.extractRoutingHashBytesFromSyntheticId(uid.bytes);
+
+ if (useDocValuesSkipper) {
+ document.add(SortedDocValuesField.indexedField(TimeSeriesIdFieldMapper.NAME, timeSeriesId));
+ document.add(SortedNumericDocValuesField.indexedField("@timestamp", timestamp));
+ } else {
+ document.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, timeSeriesId));
+ document.add(new LongField("@timestamp", timestamp, Field.Store.NO));
+ }
+ var field = new SortedDocValuesField(TimeSeriesRoutingHashFieldMapper.NAME, routingHash);
+ document.add(field);
+
} else {
// Use standard _id field (indexed and stored, some indices also trim the stored field at some point)
document.add(IdFieldMapper.standardIdField(id));
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
index 1bb7001b29890..07605b0603f82 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -67,7 +67,11 @@ public static BytesRef createField(DocumentParserContext context, RoutingHashBui
|| id.equals(indexRouting.createId(context.sourceToParse().getXContentType(), context.sourceToParse().source(), suffix));
} else if (context.sourceToParse().routing() != null) {
int routingHash = TimeSeriesRoutingHashFieldMapper.decode(context.sourceToParse().routing());
- id = createId(routingHash, tsid, timestamp);
+ if (context.indexSettings().useTsdbSyntheticId()) {
+ id = createSyntheticId(tsid, timestamp, routingHash);
+ } else {
+ id = createId(routingHash, tsid, timestamp);
+ }
} else {
if (context.sourceToParse().id() == null) {
throw new IllegalArgumentException(
@@ -118,6 +122,13 @@ public static String createId(int routingHash, BytesRef tsid, long timestamp) {
return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(bytes);
}
+ public static long extractTimestampFromId(byte[] id) {
+ assert id.length == 20;
+ // id format: [4 bytes (basic hash routing fields), 8 bytes prefix of 128 murmurhash dimension fields, 8 bytes
+ // @timestamp)
+ return ByteUtils.readLongBE(id, 12);
+ }
+
public static String createId(
boolean dynamicMappersExists,
RoutingHashBuilder routingBuilder,
@@ -141,6 +152,49 @@ public static String createId(
return id;
}
+ public static BytesRef createSyntheticIdBytesRef(BytesRef tsid, long timestamp, int routingHash) {
+ // A synthetic _id is the concatenation of [_tsid (non-fixed length) + timestamp (8 bytes) + routing hash (4 bytes)].
+ // We dont' use hashing here because we need to be able to extract the concatenated values from the _id in various places, like
+ // when applying doc values updates in Lucene, or when routing GET or DELETE requests to the corresponding shard, or when replaying
+ // translog operations. Since the synthetic _id is not indexed and not really stored on disk we consider it fine if it is longer
+ // that standard ids.
+ byte[] bytes = new byte[tsid.length + Long.BYTES + Integer.BYTES];
+ System.arraycopy(tsid.bytes, 0, bytes, 0, tsid.length);
+ ByteUtils.writeLongBE(timestamp, bytes, tsid.length);
+ ByteUtils.writeIntBE(routingHash, bytes, tsid.length + Long.BYTES);
+ return new BytesRef(bytes);
+ }
+
+ public static String createSyntheticId(BytesRef tsid, long timestamp, int routingHash) {
+ BytesRef id = createSyntheticIdBytesRef(tsid, timestamp, routingHash);
+ return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(id.bytes);
+ }
+
+ public static BytesRef extractTimeSeriesIdFromSyntheticId(byte[] id) {
+ assert id.length > Long.BYTES + Integer.BYTES;
+ // See #createSyntheticId
+ byte[] tsId = new byte[Math.toIntExact(id.length - Long.BYTES - Integer.BYTES)];
+ System.arraycopy(id, 0, tsId, 0, tsId.length);
+ return new BytesRef(tsId);
+ }
+
+ public static long extractTimestampFromSyntheticId(byte[] id) {
+ assert id.length > Long.BYTES + Integer.BYTES;
+ // See #createSyntheticId
+ return ByteUtils.readLongBE(id, id.length - Long.BYTES - Integer.BYTES);
+ }
+
+ public static int extractRoutingHashFromSyntheticId(byte[] id) {
+ assert id.length > Long.BYTES + Integer.BYTES;
+ // See #createSyntheticId
+ return ByteUtils.readIntBE(id, id.length - Integer.BYTES);
+ }
+
+ public static BytesRef extractRoutingHashBytesFromSyntheticId(byte[] id) {
+ int hash = extractRoutingHashFromSyntheticId(id);
+ return Uid.encodeId(TimeSeriesRoutingHashFieldMapper.encode(hash));
+ }
+
@Override
public String documentDescription(DocumentParserContext context) {
/*
From 9a9df492eb837d9ed7d3eed782308fe56a846279 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 27 Oct 2025 18:03:36 +0100
Subject: [PATCH 02/20] fix bug
---
.../datastreams/TSDBSyntheticIdsIT.java | 97 +++++++++++--------
.../cluster/routing/IndexRouting.java | 4 +-
.../lucene/uid/VersionsAndSeqNoResolver.java | 5 +-
.../elasticsearch/index/IndexSortConfig.java | 1 +
.../tsdb/TSDBSyntheticIdFieldsProducer.java | 11 ++-
.../index/engine/InternalEngine.java | 10 +-
.../elasticsearch/index/mapper/IdLoader.java | 18 +++-
.../index/mapper/ParsedDocument.java | 6 +-
.../mapper/TsidExtractingIdFieldMapper.java | 14 +--
.../search/DefaultSearchContext.java | 2 +-
.../index/mapper/IdLoaderTests.java | 6 +-
11 files changed, 105 insertions(+), 69 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 81e99d154060d..9d672b2c84ccc 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -22,10 +22,13 @@
import org.elasticsearch.cluster.metadata.Template;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.time.DateFormatter;
+import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.elasticsearch.test.junit.annotations.TestLogging;
@@ -45,8 +48,10 @@
import static org.elasticsearch.common.time.FormatNames.STRICT_DATE_OPTIONAL_TIME;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertCheckedResponse;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
+import static org.hamcrest.Matchers.arrayWithSize;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
@@ -101,7 +106,7 @@ public void testInvalidIndexMode() {
public void testSyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
- putDataStreamTemplate(dataStreamName, randomIntBetween(1, 3));
+ putDataStreamTemplate(dataStreamName, 1);
final var docs = new HashMap();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
@@ -160,7 +165,7 @@ enum Operation {
var randomDocs = randomSubsetOf(randomIntBetween(0, results.length), results);
for (var doc : randomDocs) {
boolean fetchSource = randomBoolean();
- var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setFetchSource(fetchSource).execute().actionGet();
+ var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setFetchSource(fetchSource).get();
assertThat(getResponse.isExists(), equalTo(true));
assertThat(getResponse.getVersion(), equalTo(1L));
@@ -170,20 +175,6 @@ enum Operation {
}
}
- // Update by synthetic _id
- //
- // Note: it doesn't work, is that expected? Is is blocked by IndexRouting.ExtractFromSource.updateShard
- var updateDocId = randomFrom(docs.keySet());
- var updateDocIndex = docs.get(updateDocId);
- var exception = expectThrows(IllegalArgumentException.class, () -> {
- var doc = document(timestamp, "vm-dev01", "cpu-load", 10); // update
- client().prepareUpdate(updateDocIndex, updateDocId).setDoc(doc).get();
- });
- assertThat(
- exception.getMessage(),
- containsString("update is not supported because the destination index [" + updateDocIndex + "] is in time_series mode")
- );
-
// Random flush or refresh or nothing, so that the next DELETEs are executed on flushed segments or in memory segments.
switch (randomFrom(Operation.values())) {
case FLUSH:
@@ -199,45 +190,73 @@ enum Operation {
// Delete by synthetic _id
var deletedDocs = randomSubsetOf(randomIntBetween(1, docs.size()), docs.keySet());
- for (var deletedDocId : deletedDocs) {
- var deletedDocIndex = docs.get(deletedDocId);
+ for (var docId : deletedDocs) {
+ var deletedDocIndex = docs.get(docId);
+ assertThat(deletedDocIndex, notNullValue());
// Delete
- var deleteResponse = client().prepareDelete(deletedDocIndex, deletedDocId).get();
- assertThat(deleteResponse.getId(), equalTo(deletedDocId));
+ var deleteResponse = client().prepareDelete(deletedDocIndex, docId).get();
+ assertThat(deleteResponse.getId(), equalTo(docId));
assertThat(deleteResponse.getIndex(), equalTo(deletedDocIndex));
assertThat(deleteResponse.getResult(), equalTo(DocWriteResponse.Result.DELETED));
assertThat(deleteResponse.getVersion(), equalTo(2L));
-
- // Get returns "not found"
- var getResponse = client().prepareGet(deletedDocIndex, deletedDocId).get();
- assertThat(getResponse.getId(), equalTo(deletedDocId));
- assertThat(getResponse.getIndex(), equalTo(deletedDocIndex));
- assertThat(getResponse.isExists(), equalTo(false));
}
- flushAndRefresh(dataStreamName);
+ refresh(dataStreamName);
- // Check that synthetic _id field have no postings on disk
- var indices = new HashSet<>(docs.values());
- for (var index : indices) {
- var diskUsage = diskUsage(index);
- var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
- assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
- }
-
- /* This does not work :-(
assertCheckedResponse(
- client().prepareSearch(dataStreamName).setTrackTotalHits(true),
+ client().prepareSearch(dataStreamName).setTrackTotalHits(true).setSize(100),
searchResponse -> {
assertHitCount(searchResponse, docs.size() - deletedDocs.size());
// Verify that search response does not contain deleted docs
for (var searchHit : searchResponse.getHits()) {
- assertThat(deletedDocs.contains(searchHit.getId()), equalTo(false));
+ assertThat(
+ "Document with id [" + searchHit.getId() + "] is deleted",
+ deletedDocs.contains(searchHit.getId()),
+ equalTo(false)
+ );
}
}
- );*/
+ );
+
+ // Search by synthetic _id
+ var otherDocs = randomSubsetOf(Sets.difference(docs.keySet(), Sets.newHashSet(deletedDocs)));
+ for (var docId : otherDocs) {
+ assertCheckedResponse(
+ client().prepareSearch(docs.get(docId))
+ .setSource(new SearchSourceBuilder().query(new TermQueryBuilder(IdFieldMapper.NAME, docId))),
+ searchResponse -> {
+ assertHitCount(searchResponse, 1L);
+ assertThat(searchResponse.getHits().getHits(), arrayWithSize(1));
+ assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo(docId));
+ }
+ );
+ }
+
+ // Update by synthetic _id
+ //
+ // Note: it doesn't work, is that expected? Is is blocked by IndexRouting.ExtractFromSource.updateShard
+ var updateDocId = randomFrom(docs.keySet());
+ var updateDocIndex = docs.get(updateDocId);
+ var exception = expectThrows(IllegalArgumentException.class, () -> {
+ var doc = document(timestamp, "vm-dev01", "cpu-load", 10); // update
+ client().prepareUpdate(updateDocIndex, updateDocId).setDoc(doc).get();
+ });
+ assertThat(
+ exception.getMessage(),
+ containsString("update is not supported because the destination index [" + updateDocIndex + "] is in time_series mode")
+ );
+
+ flush(dataStreamName);
+
+ // Check that synthetic _id field have no postings on disk
+ var indices = new HashSet<>(docs.values());
+ for (var index : indices) {
+ var diskUsage = diskUsage(index);
+ var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
+ assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+ }
}
public void testGetFromTranslogBySyntheticId() throws Exception {
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
index 3c1a7c8fcbaed..9eccb663a5362 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -397,7 +397,6 @@ public int updateShard(String id, @Nullable String routing) {
public int deleteShard(String id, @Nullable String routing) {
checkNoRouting(routing);
int shardId = idToHash(id);
- System.out.println("id " + id + " routed to " + shardId);
return rerouteWritesIfResharding(shardId);
}
@@ -431,7 +430,7 @@ private int idToHash(String id) {
hash = ByteUtils.readIntLE(idBytes, idBytes.length - 9);
} else if (useTimeSeriesSyntheticId) {
// For TSDB with synthetic ids, the hash is stored as the id suffix.
- hash = TsidExtractingIdFieldMapper.extractRoutingHashFromSyntheticId(idBytes);
+ hash = TsidExtractingIdFieldMapper.extractRoutingHashFromSyntheticId(new BytesRef(idBytes));
} else {
// For TSDB, the hash is stored as the id prefix.
hash = ByteUtils.readIntLE(idBytes, 0);
@@ -526,7 +525,6 @@ public static class ForIndexDimensions extends ExtractFromSource {
@Override
protected int hashSource(IndexRequest indexRequest) {
- System.out.println("hashSource for tsid");
BytesRef tsid = indexRequest.tsid();
if (tsid == null) {
tsid = buildTsid(indexRequest.getContentType(), indexRequest.indexSource().bytes());
diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
index b5485f3cbf1f9..5307d5d933421 100644
--- a/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
+++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/VersionsAndSeqNoResolver.java
@@ -169,11 +169,12 @@ public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(
boolean loadSeqNo,
boolean useSyntheticId
) throws IOException {
- byte[] idAsBytes = Base64.getUrlDecoder().decode(id);
final long timestamp;
if (useSyntheticId) {
- timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(idAsBytes);
+ assert uid.equals(new BytesRef(Base64.getUrlDecoder().decode(id)));
+ timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid);
} else {
+ byte[] idAsBytes = Base64.getUrlDecoder().decode(id);
timestamp = TsidExtractingIdFieldMapper.extractTimestampFromId(idAsBytes);
}
PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, true);
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
index 8235b94b4c3a9..ac2a6652f824d 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
@@ -22,6 +22,7 @@
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortOrder;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index 2b70d7a9adfef..8cfb917e3cdff 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -427,7 +427,7 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
}
// Extract the _tsid
- final BytesRef tsId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(id.bytes);
+ final BytesRef tsId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(id);
int tsIdOrd = docValues.lookupTsIdTerm(tsId);
// _tsid not found
@@ -453,7 +453,9 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
}
// _tsid found, extract the timestamp
- final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id.bytes);
+ final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id);
+
+ Ici on doit chercher après le dernier doc.
// Slow scan to the first document matching the _tsid
final int startDocID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
@@ -481,6 +483,11 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
}
}
+ if (docID == maxDocs -1) {
+ current = NO_MORE_DOCS;
+ return SeekStatus.END;
+ }
+
// set the terms enum on the first non-matching document
current = new SyntheticTerm(
docID,
diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
index 7a114b762f952..a97cfca88e253 100644
--- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
+++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
@@ -1448,7 +1448,7 @@ private IndexResult indexIntoLucene(Index index, IndexingStrategy plan) throws I
index.parsedDoc().updateSeqID(index.seqNo(), index.primaryTerm());
index.parsedDoc().version().setLongValue(plan.versionForIndexing);
try {
- logDocumentsDetails(index.docs());
+ logDocumentsDetails(index.docs(), index.id(), index.uid());
if (plan.addStaleOpToLucene) {
addStaleDocs(index.docs(), indexWriter);
} else if (plan.useLuceneUpdateDocument) {
@@ -1484,10 +1484,10 @@ && treatDocumentFailureAsTragicError(index) == false) {
}
}
- private void logDocumentsDetails(List docs) {
+ private void logDocumentsDetails(List docs, String id, BytesRef uid) {
if (useTsdbSyntheticId && logger.isTraceEnabled()) {
for (var doc : docs) {
- logger.trace("indexing document fields [{}]", doc.getFields());
+ logger.trace("indexing document [id: {}, uid: {}]:\n{}\r\n", id, uid, doc.getFields());
}
}
}
@@ -1877,7 +1877,7 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null
: "Delete tombstone document but _tombstone field is not set [" + doc + " ]";
doc.add(softDeletesField);
- logDocumentsDetails(List.of(doc));
+ logDocumentsDetails(List.of(doc), delete.id(), delete.uid());
if (plan.addStaleOpToLucene || plan.currentlyDeleted) {
indexWriter.addDocument(doc);
} else {
@@ -2815,7 +2815,7 @@ private IndexWriterConfig getIndexWriterConfig() {
new SoftDeletesRetentionMergePolicy(
Lucene.SOFT_DELETES_FIELD,
() -> softDeletesPolicy.getRetentionQuery(engineConfig.getIndexSettings().seqNoIndexOptions()),
- new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)
+ useTsdbSyntheticId ? mergePolicy : new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)
)
);
if (SHUFFLE_FORCE_MERGE) {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IdLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/IdLoader.java
index 9ceae1c750733..30407f8b4645f 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/IdLoader.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/IdLoader.java
@@ -38,8 +38,12 @@ static IdLoader fromLeafStoredFieldLoader() {
/**
* @return returns an {@link IdLoader} instance that syn synthesizes _id from routing, _tsid and @timestamp fields.
*/
- static IdLoader createTsIdLoader(IndexRouting.ExtractFromSource.ForRoutingPath indexRouting, List routingPaths) {
- return new TsIdLoader(indexRouting, routingPaths);
+ static IdLoader createTsIdLoader(
+ IndexRouting.ExtractFromSource.ForRoutingPath indexRouting,
+ List routingPaths,
+ boolean useSyntheticId
+ ) {
+ return new TsIdLoader(indexRouting, routingPaths, useSyntheticId);
}
Leaf leaf(LeafStoredFieldLoader loader, LeafReader reader, int[] docIdsInLeaf) throws IOException;
@@ -61,10 +65,12 @@ final class TsIdLoader implements IdLoader {
private final IndexRouting.ExtractFromSource.ForRoutingPath indexRouting;
private final List routingPaths;
+ private final boolean useSyntheticId;
- TsIdLoader(IndexRouting.ExtractFromSource.ForRoutingPath indexRouting, List routingPaths) {
+ TsIdLoader(IndexRouting.ExtractFromSource.ForRoutingPath indexRouting, List routingPaths, boolean useSyntheticId) {
this.routingPaths = routingPaths;
this.indexRouting = indexRouting;
+ this.useSyntheticId = useSyntheticId;
}
public IdLoader.Leaf leaf(LeafStoredFieldLoader loader, LeafReader reader, int[] docIdsInLeaf) throws IOException {
@@ -119,7 +125,11 @@ public IdLoader.Leaf leaf(LeafStoredFieldLoader loader, LeafReader reader, int[]
int routingHash = TimeSeriesRoutingHashFieldMapper.decode(
Uid.decodeId(routingHashBytes.bytes, routingHashBytes.offset, routingHashBytes.length)
);
- ids[i] = TsidExtractingIdFieldMapper.createId(routingHash, tsid, timestamp);
+ if (useSyntheticId) {
+ ids[i] = TsidExtractingIdFieldMapper.createSyntheticId(tsid, timestamp, routingHash);
+ } else {
+ ids[i] = TsidExtractingIdFieldMapper.createId(routingHash, tsid, timestamp);
+ }
}
}
return new TsIdLeaf(docIdsInLeaf, ids);
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java b/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
index 17708a2d2ad82..ef56ad180e32f 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
@@ -101,9 +101,9 @@ public static ParsedDocument deleteTombstone(
// Use a synthetic _id field which is not indexed nor stored
document.add(IdFieldMapper.syntheticIdField(id));
- var timeSeriesId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(uid.bytes);
- var timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid.bytes);
- var routingHash = TsidExtractingIdFieldMapper.extractRoutingHashBytesFromSyntheticId(uid.bytes);
+ var timeSeriesId = TsidExtractingIdFieldMapper.extractTimeSeriesIdFromSyntheticId(uid);
+ var timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid);
+ var routingHash = TsidExtractingIdFieldMapper.extractRoutingHashBytesFromSyntheticId(uid);
if (useDocValuesSkipper) {
document.add(SortedDocValuesField.indexedField(TimeSeriesIdFieldMapper.NAME, timeSeriesId));
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
index 07605b0603f82..dba74d69fa8fb 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -170,27 +170,27 @@ public static String createSyntheticId(BytesRef tsid, long timestamp, int routin
return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(id.bytes);
}
- public static BytesRef extractTimeSeriesIdFromSyntheticId(byte[] id) {
+ public static BytesRef extractTimeSeriesIdFromSyntheticId(BytesRef id) {
assert id.length > Long.BYTES + Integer.BYTES;
// See #createSyntheticId
byte[] tsId = new byte[Math.toIntExact(id.length - Long.BYTES - Integer.BYTES)];
- System.arraycopy(id, 0, tsId, 0, tsId.length);
+ System.arraycopy(id.bytes, id.offset, tsId, 0, tsId.length);
return new BytesRef(tsId);
}
- public static long extractTimestampFromSyntheticId(byte[] id) {
+ public static long extractTimestampFromSyntheticId(BytesRef id) {
assert id.length > Long.BYTES + Integer.BYTES;
// See #createSyntheticId
- return ByteUtils.readLongBE(id, id.length - Long.BYTES - Integer.BYTES);
+ return ByteUtils.readLongBE(id.bytes, id.offset + id.length - Long.BYTES - Integer.BYTES);
}
- public static int extractRoutingHashFromSyntheticId(byte[] id) {
+ public static int extractRoutingHashFromSyntheticId(BytesRef id) {
assert id.length > Long.BYTES + Integer.BYTES;
// See #createSyntheticId
- return ByteUtils.readIntBE(id, id.length - Integer.BYTES);
+ return ByteUtils.readIntBE(id.bytes, id.offset + id.length - Integer.BYTES);
}
- public static BytesRef extractRoutingHashBytesFromSyntheticId(byte[] id) {
+ public static BytesRef extractRoutingHashBytesFromSyntheticId(BytesRef id) {
int hash = extractRoutingHashFromSyntheticId(id);
return Uid.encodeId(TimeSeriesRoutingHashFieldMapper.encode(hash));
}
diff --git a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
index 475d2d1887563..bf95bc3ccf69f 100644
--- a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
+++ b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
@@ -971,7 +971,7 @@ public IdLoader newIdLoader() {
}
}
}
- return IdLoader.createTsIdLoader(indexRouting, routingPaths);
+ return IdLoader.createTsIdLoader(indexRouting, routingPaths, indexService.getIndexSettings().useTsdbSyntheticId());
} else {
return IdLoader.fromLeafStoredFieldLoader();
}
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
index 083efccceec16..d75a013eead28 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
@@ -46,7 +46,7 @@ public class IdLoaderTests extends ESTestCase {
private final int routingHash = randomInt();
public void testSynthesizeIdSimple() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null);
+ var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
List docs = List.of(
@@ -68,7 +68,7 @@ public void testSynthesizeIdSimple() throws Exception {
}
public void testSynthesizeIdMultipleSegments() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null);
+ var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
List docs1 = List.of(
@@ -138,7 +138,7 @@ public void testSynthesizeIdMultipleSegments() throws Exception {
}
public void testSynthesizeIdRandom() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null);
+ var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
Set expectedIDs = new HashSet<>();
From f6234c34e19358d9afde1fe45927ba483958f782 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Tue, 28 Oct 2025 10:16:14 +0100
Subject: [PATCH 03/20] fix remaining bug
---
.../datastreams/TSDBSyntheticIdsIT.java | 14 +++++-----
.../tsdb/TSDBSyntheticIdFieldsProducer.java | 27 +++++++------------
2 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 9d672b2c84ccc..1d62e2efe52b5 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -261,8 +261,8 @@ enum Operation {
public void testGetFromTranslogBySyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
- final var datastreamName = randomIdentifier();
- putDataStreamTemplate(datastreamName, 1);
+ final var dataStreamName = randomIdentifier();
+ putDataStreamTemplate(dataStreamName, 1);
final var docs = new HashMap();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
@@ -272,7 +272,7 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
//
// For convenience, the metric value maps the index in the bulk response items
var results = createDocuments(
- datastreamName,
+ dataStreamName,
// t + 0s
document(timestamp, "vm-dev01", "cpu-load", 0),
document(timestamp, "vm-dev02", "cpu-load", 1),
@@ -295,7 +295,7 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
// The documents are in memory buffers: the first GET will trigger the refresh of the internal reader
// (see InternalEngine.REAL_TIME_GET_REFRESH_SOURCE) to have an up-to-date searcher to resolve documents ids and versions. It will
// also enable the tracking of the locations of documents in the translog (see InternalEngine.trackTranslogLocation) so that next
- // GETs will be resolved with the translog.
+ // GETs will be resolved using the translog.
var randomDocs = randomSubsetOf(randomIntBetween(1, results.length), results);
for (var doc : randomDocs) {
var getResponse = client().prepareGet(doc.getIndex(), doc.getId()).setRealtime(true).setFetchSource(true).execute().actionGet();
@@ -310,7 +310,7 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
// Index 5 more docs
results = createDocuments(
- datastreamName,
+ dataStreamName,
// t + 2s
document(timestamp.plus(2, unit), "vm-dev01", "cpu-load", metricOffset),
document(timestamp.plus(2, unit), "vm-dev02", "cpu-load", metricOffset + 1),
@@ -342,7 +342,7 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(metricOffset + doc.getItemId()));
}
- flushAndRefresh(datastreamName);
+ flushAndRefresh(dataStreamName);
// Check that synthetic _id field have no postings on disk
var indices = new HashSet<>(docs.values());
@@ -352,7 +352,7 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
}
- assertHitCount(client().prepareSearch(datastreamName).setSize(0), 10L);
+ assertHitCount(client().prepareSearch(dataStreamName).setSize(0), 10L);
}
private static XContentBuilder document(Instant timestamp, String hostName, String metricField, Integer metricValue)
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index 8cfb917e3cdff..29540f61de1bb 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -37,6 +37,10 @@
import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.TIMESTAMP;
import static org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat.TS_ID;
+/**
+ * Produces synthetic _id terms that are computed at runtime from the doc values of other fields like _tsid, @timestamp and
+ * _ts_routing_hash.
+ */
public class TSDBSyntheticIdFieldsProducer extends FieldsProducer {
private static final Set FIELDS_NAMES = Set.of(SYNTHETIC_ID);
@@ -402,6 +406,7 @@ public BytesRef next() throws IOException {
if (current == NO_MORE_DOCS) {
return null;
}
+
int docID = (current != null) ? current.docID + 1 : 0;
if (maxDocs <= docID) {
current = NO_MORE_DOCS;
@@ -420,6 +425,7 @@ public BytesRef next() throws IOException {
@Override
public SeekStatus seekCeil(BytesRef id) throws IOException {
+
assert id != null;
assert Long.BYTES + Integer.BYTES < id.length : id.length;
if (id == null || id.length <= Long.BYTES + Integer.BYTES) {
@@ -455,15 +461,13 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
// _tsid found, extract the timestamp
final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id);
- Ici on doit chercher après le dernier doc.
-
// Slow scan to the first document matching the _tsid
final int startDocID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
assert 0 <= startDocID : startDocID;
int docID = startDocID;
int docTsIdOrd = tsIdOrd;
- long docTimestamp = -1;
+ long docTimestamp;
// Iterate over documents to find the first one matching the timestamp
for (; docID < maxDocs; docID++) {
@@ -482,21 +486,8 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
break;
}
}
-
- if (docID == maxDocs -1) {
- current = NO_MORE_DOCS;
- return SeekStatus.END;
- }
-
- // set the terms enum on the first non-matching document
- current = new SyntheticTerm(
- docID,
- docTsIdOrd,
- docValues.lookupTsIdOrd(docTsIdOrd),
- docTimestamp,
- docValues.docRoutingHash(docID)
- );
- return SeekStatus.NOT_FOUND;
+ current = NO_MORE_DOCS;
+ return SeekStatus.END;
}
@Override
From 51d66a312726c4db8cc2d799d54fe658d6d65407 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Tue, 28 Oct 2025 16:35:56 +0100
Subject: [PATCH 04/20] fix sorting
---
.../datastreams/TSDBSyntheticIdsIT.java | 29 ++++++-------
.../org/elasticsearch/index/IndexMode.java | 42 +++++++++++--------
.../elasticsearch/index/IndexSortConfig.java | 13 +++++-
.../tsdb/TSDBSyntheticIdFieldsProducer.java | 4 +-
.../mapper/TsidExtractingIdFieldMapper.java | 8 +++-
5 files changed, 56 insertions(+), 40 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 1d62e2efe52b5..ca9b3c9a33ef3 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -31,7 +31,6 @@
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
-import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
@@ -102,11 +101,10 @@ public void testInvalidIndexMode() {
);
}
- @TestLogging(reason = "debug", value = "org.elasticsearch.index.engine.Engine:TRACE")
public void testSyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
- putDataStreamTemplate(dataStreamName, 1);
+ putDataStreamTemplate(dataStreamName, randomIntBetween(1, 5));
final var docs = new HashMap();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
@@ -204,21 +202,18 @@ enum Operation {
refresh(dataStreamName);
- assertCheckedResponse(
- client().prepareSearch(dataStreamName).setTrackTotalHits(true).setSize(100),
- searchResponse -> {
- assertHitCount(searchResponse, docs.size() - deletedDocs.size());
-
- // Verify that search response does not contain deleted docs
- for (var searchHit : searchResponse.getHits()) {
- assertThat(
- "Document with id [" + searchHit.getId() + "] is deleted",
- deletedDocs.contains(searchHit.getId()),
- equalTo(false)
- );
- }
+ assertCheckedResponse(client().prepareSearch(dataStreamName).setTrackTotalHits(true).setSize(100), searchResponse -> {
+ assertHitCount(searchResponse, docs.size() - deletedDocs.size());
+
+ // Verify that search response does not contain deleted docs
+ for (var searchHit : searchResponse.getHits()) {
+ assertThat(
+ "Document with id [" + searchHit.getId() + "] is deleted",
+ deletedDocs.contains(searchHit.getId()),
+ equalTo(false)
+ );
}
- );
+ });
// Search by synthetic _id
var otherDocs = randomSubsetOf(Sets.difference(docs.keySet(), Sets.newHashSet(deletedDocs)));
diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java
index 10e604126f934..726610efb927d 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexMode.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java
@@ -42,15 +42,13 @@
import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import static java.util.stream.Collectors.toSet;
/**
* "Mode" that controls which behaviors and settings an index supports.
@@ -142,6 +140,14 @@ void validateWithOtherSettings(Map, Object> settings) {
}
var settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
+ if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
+ settingsWithIndexMode = Settings.builder()
+ .put(IndexSettings.MODE.getKey(), getName())
+ .put(IndexSettings.USE_SYNTHETIC_ID.getKey(), "true")
+ .build();
+ } else {
+ settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
+ }
for (Setting> unsupported : TIME_SERIES_UNSUPPORTED) {
if (false == Objects.equals(unsupported.getDefault(settingsWithIndexMode), settings.get(unsupported))) {
@@ -460,20 +466,22 @@ private static CompressedXContent createDefaultMapping(boolean includeHostName)
IndexSortConfig.INDEX_SORT_MISSING_SETTING
);
- static final List> VALIDATE_WITH_SETTINGS = List.copyOf(
- Stream.concat(
- Stream.of(
- IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING,
- IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING,
- IndexMetadata.INDEX_ROUTING_PATH,
- IndexMetadata.INDEX_DIMENSIONS,
- IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS,
- IndexSettings.TIME_SERIES_START_TIME,
- IndexSettings.TIME_SERIES_END_TIME
- ),
- TIME_SERIES_UNSUPPORTED.stream()
- ).collect(toSet())
- );
+ static final List> VALIDATE_WITH_SETTINGS;
+ static {
+ var settings = new HashSet>();
+ settings.add(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING);
+ settings.add(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING);
+ settings.add(IndexMetadata.INDEX_ROUTING_PATH);
+ settings.add(IndexMetadata.INDEX_DIMENSIONS);
+ settings.add(IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS);
+ settings.add(IndexSettings.TIME_SERIES_START_TIME);
+ settings.add(IndexSettings.TIME_SERIES_END_TIME);
+ if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
+ settings.add(IndexSettings.USE_SYNTHETIC_ID);
+ }
+ settings.addAll(TIME_SERIES_UNSUPPORTED);
+ VALIDATE_WITH_SETTINGS = List.copyOf(settings);
+ }
private final String name;
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
index ac2a6652f824d..69c171fd842c0 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
@@ -22,7 +22,6 @@
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
-import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortOrder;
@@ -107,12 +106,16 @@ public final class IndexSortConfig {
);
public static class IndexSortConfigDefaults {
- public static final FieldSortSpec[] TIME_SERIES_SORT, TIMESTAMP_SORT, HOSTNAME_TIMESTAMP_SORT, HOSTNAME_TIMESTAMP_BWC_SORT;
+ public static final FieldSortSpec[] TIME_SERIES_SORT, TIME_SERIES_WITH_SYNTHETIC_ID_SORT, TIMESTAMP_SORT, HOSTNAME_TIMESTAMP_SORT,
+ HOSTNAME_TIMESTAMP_BWC_SORT;
static {
FieldSortSpec timeStampSpec = new FieldSortSpec(DataStreamTimestampFieldMapper.DEFAULT_PATH);
timeStampSpec.order = SortOrder.DESC;
TIME_SERIES_SORT = new FieldSortSpec[] { new FieldSortSpec(TimeSeriesIdFieldMapper.NAME), timeStampSpec };
+ TIME_SERIES_WITH_SYNTHETIC_ID_SORT = new FieldSortSpec[] {
+ new FieldSortSpec(TimeSeriesIdFieldMapper.NAME),
+ new FieldSortSpec(DataStreamTimestampFieldMapper.DEFAULT_PATH) };
TIMESTAMP_SORT = new FieldSortSpec[] { timeStampSpec };
FieldSortSpec hostnameSpec = new FieldSortSpec(IndexMode.HOST_NAME);
@@ -141,6 +144,12 @@ public static FieldSortSpec[] getDefaultSortSpecs(Settings settings) {
}
if (IndexMode.TIME_SERIES.getName().equals(indexMode)) {
+ if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
+ var s = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
+ if (s != null) {
+ return TIME_SERIES_WITH_SYNTHETIC_ID_SORT;
+ }
+ }
return TIME_SERIES_SORT;
} else if (IndexMode.LOGSDB.getName().equals(indexMode)) {
var version = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings);
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index 29540f61de1bb..71cd99c00440f 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -150,7 +150,7 @@ private static class DocValuesHolder {
private final FieldInfo routingHashFieldInfo;
private final DocValuesProducer docValuesProducer;
- private SortedNumericDocValues timestampDocValues; // sorted desc. order
+ private SortedNumericDocValues timestampDocValues; // sorted asc. order
private SortedDocValues routingHashDocValues; // sorted asc. order
private SortedDocValues tsIdDocValues; // sorted asc. order
// Keep around the latest tsId ordinal and value
@@ -482,7 +482,7 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
return SeekStatus.FOUND;
}
// Remaining docs don't match, stop here
- if (tsIdOrd < docTsIdOrd || docTimestamp < timestamp) {
+ if (tsIdOrd < docTsIdOrd || timestamp < docTimestamp) {
break;
}
}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
index dba74d69fa8fb..64d834ef3a7bf 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -158,9 +158,13 @@ public static BytesRef createSyntheticIdBytesRef(BytesRef tsid, long timestamp,
// when applying doc values updates in Lucene, or when routing GET or DELETE requests to the corresponding shard, or when replaying
// translog operations. Since the synthetic _id is not indexed and not really stored on disk we consider it fine if it is longer
// that standard ids.
+ //
+ // Also, when applying doc values updates Lucene expects _id to be sorted: it stops applying updates for a term "_id:ABC" if it
+ // seeks to a term "BCD" as it knows there won't be more documents matching "_id:ABC" past the term "BCD". So it is important to
+ // generate an _id that reflects the ordering of the terms it is synthesized from, ie _tsid and @timestamp.
byte[] bytes = new byte[tsid.length + Long.BYTES + Integer.BYTES];
- System.arraycopy(tsid.bytes, 0, bytes, 0, tsid.length);
- ByteUtils.writeLongBE(timestamp, bytes, tsid.length);
+ System.arraycopy(tsid.bytes, tsid.offset, bytes, 0, tsid.length);
+ ByteUtils.writeLongBE(timestamp, bytes, tsid.length); // Big Endian as we want to most significant byte first
ByteUtils.writeIntBE(routingHash, bytes, tsid.length + Long.BYTES);
return new BytesRef(bytes);
}
From 8cfa2fab051d5e7604cb55ef68cf9a0340a855ff Mon Sep 17 00:00:00 2001
From: tlrx
Date: Tue, 28 Oct 2025 18:00:37 +0100
Subject: [PATCH 05/20] fix compiling and tests
---
.../common/lucene/uid/VersionLookupTests.java | 2 +-
.../common/lucene/uid/VersionsTests.java | 22 +++++++++---
.../index/mapper/IdLoaderTests.java | 35 +++++++++++--------
.../index/shard/IndexShardTests.java | 2 +-
4 files changed, 40 insertions(+), 21 deletions(-)
diff --git a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java
index 2e69987f29180..59c8a2028d98a 100644
--- a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java
+++ b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java
@@ -159,7 +159,7 @@ public void testLoadTimestampRangeWithDeleteTombstone() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setMergePolicy(NoMergePolicy.INSTANCE));
var randomSeqNoIndexOptions = randomFrom(SeqNoFieldMapper.SeqNoIndexOptions.values());
- writer.addDocument(ParsedDocument.deleteTombstone(randomSeqNoIndexOptions, false, "_id").docs().get(0));
+ writer.addDocument(ParsedDocument.deleteTombstone(randomSeqNoIndexOptions, "_id").docs().get(0));
DirectoryReader reader = DirectoryReader.open(writer);
LeafReaderContext segment = reader.leaves().get(0);
PerThreadIDVersionAndSeqNoLookup lookup = new PerThreadIDVersionAndSeqNoLookup(segment.reader(), true);
diff --git a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java
index 59c82195b7fce..141b865a8395b 100644
--- a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java
+++ b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java
@@ -200,7 +200,7 @@ public void testTimeSeriesLoadDocIdAndVersion() throws Exception {
DirectoryReader directoryReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "_na_", 1));
String id = createTSDBId(1000L);
assertThat(
- VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, randomBoolean()),
+ VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, randomBoolean(), false),
nullValue()
);
@@ -222,11 +222,23 @@ public void testTimeSeriesLoadDocIdAndVersion() throws Exception {
directoryReader = reopen(directoryReader);
id = createTSDBId(randomLongBetween(1000, 10000));
- assertThat(VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, true), notNullValue());
- assertThat(VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("2"), id, true), notNullValue());
+ assertThat(
+ VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, true, false),
+ notNullValue()
+ );
+ assertThat(
+ VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("2"), id, true, false),
+ notNullValue()
+ );
id = createTSDBId(randomBoolean() ? randomLongBetween(0, 999) : randomLongBetween(10001, Long.MAX_VALUE));
- assertThat(VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, true), nullValue());
- assertThat(VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("2"), id, true), nullValue());
+ assertThat(
+ VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("1"), id, true, false),
+ nullValue()
+ );
+ assertThat(
+ VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion(directoryReader, new BytesRef("2"), id, true, false),
+ nullValue()
+ );
directoryReader.close();
writer.close();
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
index d75a013eead28..bf595392108c3 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java
@@ -46,7 +46,8 @@ public class IdLoaderTests extends ESTestCase {
private final int routingHash = randomInt();
public void testSynthesizeIdSimple() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
+ final boolean useSyntheticIds = randomBoolean();
+ var idLoader = IdLoader.createTsIdLoader(null, null, useSyntheticIds);
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
List docs = List.of(
@@ -60,15 +61,16 @@ public void testSynthesizeIdSimple() throws Exception {
assertThat(leafReader.numDocs(), equalTo(3));
var leaf = idLoader.leaf(null, leafReader, new int[] { 0, 1, 2 });
// NOTE: time series data is ordered by (tsid, timestamp)
- assertThat(leaf.getId(0), equalTo(expectedId(docs.get(2), routingHash)));
- assertThat(leaf.getId(1), equalTo(expectedId(docs.get(0), routingHash)));
- assertThat(leaf.getId(2), equalTo(expectedId(docs.get(1), routingHash)));
+ assertThat(leaf.getId(0), equalTo(expectedId(docs.get(2), routingHash, useSyntheticIds)));
+ assertThat(leaf.getId(1), equalTo(expectedId(docs.get(0), routingHash, useSyntheticIds)));
+ assertThat(leaf.getId(2), equalTo(expectedId(docs.get(1), routingHash, useSyntheticIds)));
};
prepareIndexReader(indexAndForceMerge(docs, routingHash), verify, false);
}
public void testSynthesizeIdMultipleSegments() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
+ final boolean useSyntheticIds = randomBoolean();
+ var idLoader = IdLoader.createTsIdLoader(null, null, useSyntheticIds);
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
List docs1 = List.of(
@@ -110,22 +112,22 @@ public void testSynthesizeIdMultipleSegments() throws Exception {
assertThat(leafReader.numDocs(), equalTo(docs1.size()));
var leaf = idLoader.leaf(null, leafReader, IntStream.range(0, docs1.size()).toArray());
for (int i = 0; i < docs1.size(); i++) {
- assertThat(leaf.getId(i), equalTo(expectedId(docs1.get(i), routingHash)));
+ assertThat(leaf.getId(i), equalTo(expectedId(docs1.get(i), routingHash, useSyntheticIds)));
}
}
{
LeafReader leafReader = indexReader.leaves().get(1).reader();
assertThat(leafReader.numDocs(), equalTo(docs2.size()));
var leaf = idLoader.leaf(null, leafReader, new int[] { 0, 3 });
- assertThat(leaf.getId(0), equalTo(expectedId(docs2.get(0), routingHash)));
- assertThat(leaf.getId(3), equalTo(expectedId(docs2.get(3), routingHash)));
+ assertThat(leaf.getId(0), equalTo(expectedId(docs2.get(0), routingHash, useSyntheticIds)));
+ assertThat(leaf.getId(3), equalTo(expectedId(docs2.get(3), routingHash, useSyntheticIds)));
}
{
LeafReader leafReader = indexReader.leaves().get(2).reader();
assertThat(leafReader.numDocs(), equalTo(docs3.size()));
var leaf = idLoader.leaf(null, leafReader, new int[] { 1, 2 });
- assertThat(leaf.getId(1), equalTo(expectedId(docs3.get(1), routingHash)));
- assertThat(leaf.getId(2), equalTo(expectedId(docs3.get(2), routingHash)));
+ assertThat(leaf.getId(1), equalTo(expectedId(docs3.get(1), routingHash, useSyntheticIds)));
+ assertThat(leaf.getId(2), equalTo(expectedId(docs3.get(2), routingHash, useSyntheticIds)));
}
{
LeafReader leafReader = indexReader.leaves().get(2).reader();
@@ -138,7 +140,8 @@ public void testSynthesizeIdMultipleSegments() throws Exception {
}
public void testSynthesizeIdRandom() throws Exception {
- var idLoader = IdLoader.createTsIdLoader(null, null, indexService.getIndexSettings().useTsdbSyntheticId());
+ final boolean useSyntheticIds = randomBoolean();
+ var idLoader = IdLoader.createTsIdLoader(null, null, useSyntheticIds);
long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
Set expectedIDs = new HashSet<>();
@@ -161,7 +164,7 @@ public void testSynthesizeIdRandom() throws Exception {
for (int j = 0; j < numberOfSamples; j++) {
Doc doc = new Doc(startTime++, dimensions);
randomDocs.add(doc);
- expectedIDs.add(expectedId(doc, routingHash));
+ expectedIDs.add(expectedId(doc, routingHash, useSyntheticIds));
}
}
CheckedConsumer verify = indexReader -> {
@@ -240,7 +243,7 @@ private static void indexDoc(IndexWriter iw, Doc doc, int routingHash) throws IO
iw.addDocument(fields);
}
- private static String expectedId(Doc doc, int routingHash) throws IOException {
+ private static String expectedId(Doc doc, int routingHash, boolean useSyntheticIds) {
var routingFields = new RoutingPathFields(null);
for (Dimension dimension : doc.dimensions) {
if (dimension.value instanceof Number n) {
@@ -249,7 +252,11 @@ private static String expectedId(Doc doc, int routingHash) throws IOException {
routingFields.addString(dimension.field, dimension.value.toString());
}
}
- return TsidExtractingIdFieldMapper.createId(routingHash, routingFields.buildHash().toBytesRef(), doc.timestamp);
+ if (useSyntheticIds) {
+ return TsidExtractingIdFieldMapper.createSyntheticId(routingFields.buildHash().toBytesRef(), doc.timestamp, routingHash);
+ } else {
+ return TsidExtractingIdFieldMapper.createId(routingHash, routingFields.buildHash().toBytesRef(), doc.timestamp);
+ }
}
record Doc(long timestamp, List dimensions) {}
diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java
index 94d3548c23793..e792a0f48553b 100644
--- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java
+++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java
@@ -4568,7 +4568,7 @@ public void testOnCloseStats() throws IOException {
public void testSupplyTombstoneDoc() throws Exception {
IndexShard shard = newStartedShard();
String id = randomRealisticUnicodeOfLengthBetween(1, 10);
- ParsedDocument deleteTombstone = ParsedDocument.deleteTombstone(shard.indexSettings.seqNoIndexOptions(), randomBoolean(), id);
+ ParsedDocument deleteTombstone = ParsedDocument.deleteTombstone(shard.indexSettings.seqNoIndexOptions(), id);
assertThat(deleteTombstone.docs(), hasSize(1));
LuceneDocument deleteDoc = deleteTombstone.docs().get(0);
assertThat(
From d885dda33aceb4f18cbab5c0ab1c3e10682b8c0a Mon Sep 17 00:00:00 2001
From: tlrx
Date: Wed, 29 Oct 2025 14:00:20 +0100
Subject: [PATCH 06/20] fix sort config
---
.../main/java/org/elasticsearch/index/IndexSortConfig.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
index 69c171fd842c0..e52ed3088e671 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
@@ -145,8 +145,8 @@ public static FieldSortSpec[] getDefaultSortSpecs(Settings settings) {
if (IndexMode.TIME_SERIES.getName().equals(indexMode)) {
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
- var s = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
- if (s != null) {
+ var useSyntheticId = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
+ if (useSyntheticId != null && useSyntheticId.equalsIgnoreCase(Boolean.TRUE.toString())) {
return TIME_SERIES_WITH_SYNTHETIC_ID_SORT;
}
}
From b22f59c52695c4c2745456b60c3863da1b0f05f0 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Wed, 29 Oct 2025 14:46:42 +0100
Subject: [PATCH 07/20] fix sort config
---
server/src/main/java/org/elasticsearch/index/IndexMode.java | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java
index 726610efb927d..b3df1bfd8ea5a 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexMode.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java
@@ -139,11 +139,12 @@ void validateWithOtherSettings(Map, Object> settings) {
throw new IllegalArgumentException(error(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING));
}
- var settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
+ Settings settingsWithIndexMode;
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
settingsWithIndexMode = Settings.builder()
.put(IndexSettings.MODE.getKey(), getName())
- .put(IndexSettings.USE_SYNTHETIC_ID.getKey(), "true")
+ // Default values of some index sort settings depend of the feature flag and USE_SYNTHETIC_ID setting
+ .put(IndexSettings.USE_SYNTHETIC_ID.getKey(), (Boolean) settings.get(IndexSettings.USE_SYNTHETIC_ID))
.build();
} else {
settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
From b50b64ccd2f21cfe54d4ad4cb944e1c9588a6148 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Thu, 30 Oct 2025 12:49:26 +0100
Subject: [PATCH 08/20] fix merge
---
.../main/java/org/elasticsearch/index/IndexSortConfig.java | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
index f8962c648d336..d3248c76b14db 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
@@ -147,6 +147,12 @@ public static FieldSortSpec[] getDefaultSortSpecs(Settings settings) {
}
if (IndexMode.TIME_SERIES.getName().equals(indexMode)) {
+ if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
+ var useSyntheticId = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
+ if (useSyntheticId != null && useSyntheticId.equalsIgnoreCase(Boolean.TRUE.toString())) {
+ return TIME_SERIES_WITH_SYNTHETIC_ID_SORT;
+ }
+ }
return TIME_SERIES_SORT;
} else if (IndexMode.LOGSDB.getName().equals(indexMode)) {
var version = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings);
From 933e280c0e78eed612e23f389c0d745312c6ec43 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Thu, 30 Oct 2025 14:51:34 +0100
Subject: [PATCH 09/20] compute useTimeSeriesSyntheticId in metadata
---
.../cluster/metadata/IndexMetadata.java | 39 +++++++++++++---
.../cluster/routing/IndexRouting.java | 5 +--
.../elasticsearch/index/IndexSettings.java | 44 ++++++++++++++++---
.../index/codec/CodecService.java | 2 +-
.../index/engine/InternalEngine.java | 2 +-
.../mapper/TsidExtractingIdFieldMapper.java | 4 +-
.../search/DefaultSearchContext.java | 2 +-
7 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
index 38d0fada7d866..6fee4b39dbe22 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
@@ -705,6 +705,8 @@ public Iterator> settings() {
@Nullable
private final IndexReshardingMetadata reshardingMetadata;
+ private final boolean useTimeSeriesSyntheticId;
+
private IndexMetadata(
final Index index,
final long version,
@@ -754,7 +756,8 @@ private IndexMetadata(
@Nullable final IndexMetadataStats stats,
@Nullable final Double writeLoadForecast,
@Nullable Long shardSizeInBytesForecast,
- @Nullable IndexReshardingMetadata reshardingMetadata
+ @Nullable IndexReshardingMetadata reshardingMetadata,
+ final boolean useTimeSeriesSyntheticId
) {
this.index = index;
this.version = version;
@@ -815,6 +818,7 @@ private IndexMetadata(
this.shardSizeInBytesForecast = shardSizeInBytesForecast;
assert numberOfShards * routingFactor == routingNumShards : routingNumShards + " must be a multiple of " + numberOfShards;
this.reshardingMetadata = reshardingMetadata;
+ this.useTimeSeriesSyntheticId = useTimeSeriesSyntheticId;
}
IndexMetadata withMappingMetadata(MappingMetadata mapping) {
@@ -870,7 +874,8 @@ IndexMetadata withMappingMetadata(MappingMetadata mapping) {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
- this.reshardingMetadata
+ this.reshardingMetadata,
+ this.useTimeSeriesSyntheticId
);
}
@@ -933,7 +938,8 @@ public IndexMetadata withInSyncAllocationIds(int shardId, Set inSyncSet)
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
- this.reshardingMetadata
+ this.reshardingMetadata,
+ this.useTimeSeriesSyntheticId
);
}
@@ -1004,7 +1010,8 @@ public IndexMetadata withSetPrimaryTerm(int shardId, long primaryTerm) {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
- this.reshardingMetadata
+ this.reshardingMetadata,
+ this.useTimeSeriesSyntheticId
);
}
@@ -1066,7 +1073,8 @@ public IndexMetadata withTimestampRanges(IndexLongFieldRange timestampRange, Ind
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
- this.reshardingMetadata
+ this.reshardingMetadata,
+ this.useTimeSeriesSyntheticId
);
}
@@ -1123,7 +1131,8 @@ public IndexMetadata withIncrementedVersion() {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
- this.reshardingMetadata
+ this.reshardingMetadata,
+ this.useTimeSeriesSyntheticId
);
}
@@ -1314,6 +1323,13 @@ public Instant getTimeSeriesEnd() {
return timeSeriesEnd;
}
+ /**
+ * @return whether the index is a time-series index that uses synthetic ids or not.
+ */
+ public boolean useTimeSeriesSyntheticId() {
+ return useTimeSeriesSyntheticId;
+ }
+
/**
* Return the concrete mapping for this index or {@code null} if this index has no mappings at all.
*/
@@ -2497,6 +2513,14 @@ IndexMetadata build(boolean repair) {
String indexModeString = settings.get(IndexSettings.MODE.getKey());
final IndexMode indexMode = indexModeString != null ? IndexMode.fromString(indexModeString.toLowerCase(Locale.ROOT)) : null;
final boolean isTsdb = indexMode == IndexMode.TIME_SERIES;
+ boolean useTimeSeriesSyntheticId = false;
+ if (isTsdb && indexCreatedVersion.onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)) {
+ var setting = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
+ if (setting != null && setting.equalsIgnoreCase(Boolean.TRUE.toString())) {
+ assert IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG;
+ useTimeSeriesSyntheticId = true;
+ }
+ }
return new IndexMetadata(
new Index(index, uuid),
version,
@@ -2546,7 +2570,8 @@ IndexMetadata build(boolean repair) {
stats,
indexWriteLoadForecast,
shardSizeInBytesForecast,
- reshardingMetadata
+ reshardingMetadata,
+ useTimeSeriesSyntheticId
);
}
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
index 9eccb663a5362..0cc8167727978 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -26,7 +26,6 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexMode;
-import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
@@ -336,9 +335,7 @@ public abstract static class ExtractFromSource extends IndexRouting {
assert indexMode != null : "Index mode must be set for ExtractFromSource routing";
this.trackTimeSeriesRoutingHash = indexMode == IndexMode.TIME_SERIES
&& metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID);
- this.useTimeSeriesSyntheticId = trackTimeSeriesRoutingHash
- && metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)
- && IndexSettings.USE_SYNTHETIC_ID.get(metadata.getSettings());
+ this.useTimeSeriesSyntheticId = metadata.useTimeSeriesSyntheticId();
addIdWithRoutingHash = indexMode == IndexMode.LOGSDB;
this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.copyOf(includePaths), null, true);
}
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
index 26f747d2d2315..f7af6bf098761 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
@@ -690,7 +690,19 @@ public boolean isES87TSDBCodecEnabled() {
false,
new Setting.Validator<>() {
@Override
- public void validate(Boolean value) {}
+ public void validate(Boolean enabled) {
+ if (enabled) {
+ if (TSDB_SYNTHETIC_ID_FEATURE_FLAG == false) {
+ throw new IllegalArgumentException(
+ String.format(
+ Locale.ROOT,
+ "The setting [%s] is only permitted when the feature flag is enabled.",
+ USE_SYNTHETIC_ID.getKey()
+ )
+ );
+ }
+ }
+ }
@Override
public void validate(Boolean enabled, Map, Object> settings) {
@@ -983,7 +995,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private final boolean recoverySourceEnabled;
private final boolean recoverySourceSyntheticEnabled;
private final boolean useDocValuesSkipper;
- private final boolean tsdbSyntheticId;
+ private final boolean useTimeSeriesSyntheticId;
/**
* The maximum number of refresh listeners allows on this shard.
@@ -1170,8 +1182,28 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
&& scopedSettings.get(RECOVERY_USE_SYNTHETIC_SOURCE_SETTING);
useDocValuesSkipper = DOC_VALUES_SKIPPER && scopedSettings.get(USE_DOC_VALUES_SKIPPER);
seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING);
- tsdbSyntheticId = TSDB_SYNTHETIC_ID_FEATURE_FLAG && scopedSettings.get(USE_SYNTHETIC_ID);
- assert tsdbSyntheticId == false || mode == IndexMode.TIME_SERIES : mode;
+ final var useSyntheticId = scopedSettings.get(USE_SYNTHETIC_ID);
+ if (indexMetadata.useTimeSeriesSyntheticId() != useSyntheticId) {
+ assert false;
+ throw new IllegalArgumentException(
+ String.format(
+ Locale.ROOT,
+ "The setting [%s] is set to [%s] but index metadata has a different value [%s].",
+ USE_SYNTHETIC_ID.getKey(),
+ useSyntheticId,
+ indexMetadata.useTimeSeriesSyntheticId()
+ )
+ );
+ }
+ if (useSyntheticId) {
+ assert TSDB_SYNTHETIC_ID_FEATURE_FLAG;
+ assert indexMetadata.useTimeSeriesSyntheticId();
+ assert indexMetadata.getIndexMode() == IndexMode.TIME_SERIES : indexMetadata.getIndexMode();
+ assert indexMetadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID);
+ useTimeSeriesSyntheticId = true;
+ } else {
+ useTimeSeriesSyntheticId = false;
+ }
if (recoverySourceSyntheticEnabled) {
if (DiscoveryNode.isStateless(settings)) {
throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful");
@@ -1907,8 +1939,8 @@ public boolean useDocValuesSkipper() {
/**
* @return Whether the index is a time-series index that use synthetic ids.
*/
- public boolean useTsdbSyntheticId() {
- return tsdbSyntheticId;
+ public boolean useTimeSeriesSyntheticId() {
+ return useTimeSeriesSyntheticId;
}
/**
diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
index 1e2fed61578a5..5d6e377d57db9 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
@@ -67,7 +67,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
for (String codec : Codec.availableCodecs()) {
codecs.put(codec, Codec.forName(codec));
}
- final boolean useTsdbSyntheticId = mapperService != null && mapperService.getIndexSettings().useTsdbSyntheticId();
+ final boolean useTsdbSyntheticId = mapperService != null && mapperService.getIndexSettings().useTimeSeriesSyntheticId();
assert useTsdbSyntheticId == false || mapperService.getIndexSettings().getMode() == IndexMode.TIME_SERIES;
this.codecs = codecs.entrySet().stream().collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, e -> {
diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
index a97cfca88e253..d9bf37717edc5 100644
--- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
+++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
@@ -245,7 +245,7 @@ public InternalEngine(EngineConfig engineConfig) {
InternalEngine(EngineConfig engineConfig, int maxDocs, BiFunction localCheckpointTrackerSupplier) {
super(engineConfig);
this.maxDocs = maxDocs;
- if (engineConfig.getIndexSettings().useTsdbSyntheticId()) {
+ if (engineConfig.getIndexSettings().useTimeSeriesSyntheticId()) {
logger.info("using TSDB with synthetic id");
useTsdbSyntheticId = true;
} else {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
index 64d834ef3a7bf..bab459269ee04 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -67,7 +67,7 @@ public static BytesRef createField(DocumentParserContext context, RoutingHashBui
|| id.equals(indexRouting.createId(context.sourceToParse().getXContentType(), context.sourceToParse().source(), suffix));
} else if (context.sourceToParse().routing() != null) {
int routingHash = TimeSeriesRoutingHashFieldMapper.decode(context.sourceToParse().routing());
- if (context.indexSettings().useTsdbSyntheticId()) {
+ if (context.indexSettings().useTimeSeriesSyntheticId()) {
id = createSyntheticId(tsid, timestamp, routingHash);
} else {
id = createId(routingHash, tsid, timestamp);
@@ -98,7 +98,7 @@ public static BytesRef createField(DocumentParserContext context, RoutingHashBui
context.id(id);
final Field idField;
- if (context.indexSettings().useTsdbSyntheticId()) {
+ if (context.indexSettings().useTimeSeriesSyntheticId()) {
idField = syntheticIdField(context.id());
} else {
idField = standardIdField(context.id());
diff --git a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
index bf95bc3ccf69f..0b94b3be3650f 100644
--- a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
+++ b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
@@ -971,7 +971,7 @@ public IdLoader newIdLoader() {
}
}
}
- return IdLoader.createTsIdLoader(indexRouting, routingPaths, indexService.getIndexSettings().useTsdbSyntheticId());
+ return IdLoader.createTsIdLoader(indexRouting, routingPaths, indexService.getIndexSettings().useTimeSeriesSyntheticId());
} else {
return IdLoader.fromLeafStoredFieldLoader();
}
From 4662f945ea9bad25ca7f313052dc27ab1eace426 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 10:25:46 +0100
Subject: [PATCH 10/20] remove update
---
.../datastreams/TSDBSyntheticIdsIT.java | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index ca9b3c9a33ef3..a2db96cbafeb8 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -229,20 +229,6 @@ enum Operation {
);
}
- // Update by synthetic _id
- //
- // Note: it doesn't work, is that expected? Is is blocked by IndexRouting.ExtractFromSource.updateShard
- var updateDocId = randomFrom(docs.keySet());
- var updateDocIndex = docs.get(updateDocId);
- var exception = expectThrows(IllegalArgumentException.class, () -> {
- var doc = document(timestamp, "vm-dev01", "cpu-load", 10); // update
- client().prepareUpdate(updateDocIndex, updateDocId).setDoc(doc).get();
- });
- assertThat(
- exception.getMessage(),
- containsString("update is not supported because the destination index [" + updateDocIndex + "] is in time_series mode")
- );
-
flush(dataStreamName);
// Check that synthetic _id field have no postings on disk
From b3428c7fcfde389b57dac4aa8147b58ec54a07c5 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 10:28:52 +0100
Subject: [PATCH 11/20] startDocID >= 0
---
.../index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index 71cd99c00440f..e94a1c3e06984 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -463,7 +463,7 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
// Slow scan to the first document matching the _tsid
final int startDocID = docValues.slowScanToFirstDocWithTsIdOrdinalEqualTo(tsIdOrd);
- assert 0 <= startDocID : startDocID;
+ assert startDocID >= 0 : startDocID;
int docID = startDocID;
int docTsIdOrd = tsIdOrd;
From 3f81d6046a48728453b7ede42e3b3271bf404667 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 10:39:03 +0100
Subject: [PATCH 12/20] get from searcher
---
.../datastreams/TSDBSyntheticIdsIT.java | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index a2db96cbafeb8..79892d1a3dd89 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -325,6 +325,23 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
flushAndRefresh(dataStreamName);
+ // Get by synthetic _id
+ //
+ // Here we exercise the get-from-searcher and VersionsAndSeqNoResolver.timeSeriesLoadDocIdAndVersion paths.
+ randomDocs = randomSubsetOf(randomIntBetween(1, results.length), results);
+ for (var doc : randomDocs) {
+ var getResponse = client().prepareGet(doc.getIndex(), doc.getId())
+ .setRealtime(randomBoolean())
+ .setFetchSource(true)
+ .execute()
+ .actionGet();
+ assertThat(getResponse.isExists(), equalTo(true));
+ assertThat(getResponse.getVersion(), equalTo(1L));
+
+ var source = asInstanceOf(Map.class, getResponse.getSourceAsMap().get("metric"));
+ assertThat(asInstanceOf(Integer.class, source.get("value")), equalTo(metricOffset + doc.getItemId()));
+ }
+
// Check that synthetic _id field have no postings on disk
var indices = new HashSet<>(docs.values());
for (var index : indices) {
From 15a1e4c19db69e03436763944dc85842eab3b65c Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 10:43:42 +0100
Subject: [PATCH 13/20] remove comment
---
.../index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index e94a1c3e06984..f66d1e3508870 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -287,7 +287,7 @@ private BytesRef lookupTsIdOrd(int tsIdOrdinal) throws IOException {
*/
private int slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) throws IOException {
// recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
- if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) { // can't use tsIdDocValues.ordValue() here??
+ if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) {
tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
cachedTsIdOrd = -1;
cachedTsId = null;
@@ -320,7 +320,7 @@ private int slowScanToFirstDocWithTsIdOrdinalEqualOrGreaterThan(int tsIdOrd) thr
*/
private int slowScanToFirstDocWithTsIdOrdinalEqualTo(int tsIdOrd) throws IOException {
// recreate even if doc values are already on the same ordinal, to ensure the method returns the first doc
- if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) { // can't use tsIdDocValues.ordValue() here??
+ if (tsIdDocValues == null || (cachedTsIdOrd != -1 && cachedTsIdOrd >= tsIdOrd)) {
tsIdDocValues = docValuesProducer.getSorted(tsIdFieldInfo);
cachedTsIdOrd = -1;
cachedTsId = null;
From d71316d39ec6553bdf487f13a03a6ee597b3b5fc Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 12:56:14 +0100
Subject: [PATCH 14/20] timestamp
---
.../org/elasticsearch/index/IndexSortConfig.java | 12 +-----------
.../codec/tsdb/TSDBSyntheticIdFieldsProducer.java | 4 ++--
.../index/mapper/TsidExtractingIdFieldMapper.java | 12 ++++++++----
3 files changed, 11 insertions(+), 17 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
index d3248c76b14db..fd445d470837c 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
@@ -107,7 +107,7 @@ public final class IndexSortConfig {
);
public static class IndexSortConfigDefaults {
- public static final FieldSortSpec[] TIME_SERIES_SORT, TIME_SERIES_WITH_SYNTHETIC_ID_SORT, HOSTNAME_TIMESTAMP_BWC_SORT;
+ public static final FieldSortSpec[] TIME_SERIES_SORT, HOSTNAME_TIMESTAMP_BWC_SORT;
private static final FieldSortSpec HOSTNAME_SPEC, MESSAGE_PATTERN_SPEC, TIMESTAMP_SPEC;
@@ -116,10 +116,6 @@ public static class IndexSortConfigDefaults {
TIMESTAMP_SPEC.order = SortOrder.DESC;
TIME_SERIES_SORT = new FieldSortSpec[] { new FieldSortSpec(TimeSeriesIdFieldMapper.NAME), TIMESTAMP_SPEC };
- TIME_SERIES_WITH_SYNTHETIC_ID_SORT = new FieldSortSpec[] {
- new FieldSortSpec(TimeSeriesIdFieldMapper.NAME),
- new FieldSortSpec(DataStreamTimestampFieldMapper.DEFAULT_PATH) };
-
HOSTNAME_SPEC = new FieldSortSpec(IndexMode.HOST_NAME);
HOSTNAME_SPEC.order = SortOrder.ASC;
HOSTNAME_SPEC.missingValue = "_last";
@@ -147,12 +143,6 @@ public static FieldSortSpec[] getDefaultSortSpecs(Settings settings) {
}
if (IndexMode.TIME_SERIES.getName().equals(indexMode)) {
- if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
- var useSyntheticId = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
- if (useSyntheticId != null && useSyntheticId.equalsIgnoreCase(Boolean.TRUE.toString())) {
- return TIME_SERIES_WITH_SYNTHETIC_ID_SORT;
- }
- }
return TIME_SERIES_SORT;
} else if (IndexMode.LOGSDB.getName().equals(indexMode)) {
var version = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings);
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
index f66d1e3508870..1431c10331d69 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java
@@ -150,7 +150,7 @@ private static class DocValuesHolder {
private final FieldInfo routingHashFieldInfo;
private final DocValuesProducer docValuesProducer;
- private SortedNumericDocValues timestampDocValues; // sorted asc. order
+ private SortedNumericDocValues timestampDocValues; // sorted desc. order
private SortedDocValues routingHashDocValues; // sorted asc. order
private SortedDocValues tsIdDocValues; // sorted asc. order
// Keep around the latest tsId ordinal and value
@@ -482,7 +482,7 @@ public SeekStatus seekCeil(BytesRef id) throws IOException {
return SeekStatus.FOUND;
}
// Remaining docs don't match, stop here
- if (tsIdOrd < docTsIdOrd || timestamp < docTimestamp) {
+ if (tsIdOrd < docTsIdOrd || docTimestamp < timestamp) {
break;
}
}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
index bab459269ee04..f5cbfb5cd71cb 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -153,7 +153,7 @@ public static String createId(
}
public static BytesRef createSyntheticIdBytesRef(BytesRef tsid, long timestamp, int routingHash) {
- // A synthetic _id is the concatenation of [_tsid (non-fixed length) + timestamp (8 bytes) + routing hash (4 bytes)].
+ // A synthetic _id has the format: [_tsid (non-fixed length) + (Long.MAX_VALUE - timestamp) (8 bytes) + routing hash (4 bytes)].
// We dont' use hashing here because we need to be able to extract the concatenated values from the _id in various places, like
// when applying doc values updates in Lucene, or when routing GET or DELETE requests to the corresponding shard, or when replaying
// translog operations. Since the synthetic _id is not indexed and not really stored on disk we consider it fine if it is longer
@@ -161,10 +161,11 @@ public static BytesRef createSyntheticIdBytesRef(BytesRef tsid, long timestamp,
//
// Also, when applying doc values updates Lucene expects _id to be sorted: it stops applying updates for a term "_id:ABC" if it
// seeks to a term "BCD" as it knows there won't be more documents matching "_id:ABC" past the term "BCD". So it is important to
- // generate an _id that reflects the ordering of the terms it is synthesized from, ie _tsid and @timestamp.
+ // generate an _id as a byte array whose lexicographical order reflects the order of the documents in the segment. For this reason,
+ // the timestamp is stored in the synthetic _id as (Long.MAX_VALUE - timestamp).
byte[] bytes = new byte[tsid.length + Long.BYTES + Integer.BYTES];
System.arraycopy(tsid.bytes, tsid.offset, bytes, 0, tsid.length);
- ByteUtils.writeLongBE(timestamp, bytes, tsid.length); // Big Endian as we want to most significant byte first
+ ByteUtils.writeLongBE(Long.MAX_VALUE - timestamp, bytes, tsid.length); // Big Endian as we want to most significant byte first
ByteUtils.writeIntBE(routingHash, bytes, tsid.length + Long.BYTES);
return new BytesRef(bytes);
}
@@ -185,7 +186,10 @@ public static BytesRef extractTimeSeriesIdFromSyntheticId(BytesRef id) {
public static long extractTimestampFromSyntheticId(BytesRef id) {
assert id.length > Long.BYTES + Integer.BYTES;
// See #createSyntheticId
- return ByteUtils.readLongBE(id.bytes, id.offset + id.length - Long.BYTES - Integer.BYTES);
+ long delta = ByteUtils.readLongBE(id.bytes, id.offset + id.length - Long.BYTES - Integer.BYTES);
+ long timestamp = Long.MAX_VALUE - delta;
+ assert timestamp >= 0 : delta;
+ return timestamp;
}
public static int extractRoutingHashFromSyntheticId(BytesRef id) {
From 96eb36a70a132a75f054a3209e52be5a55901720 Mon Sep 17 00:00:00 2001
From: Tanguy Leroux
Date: Mon, 3 Nov 2025 12:58:55 +0100
Subject: [PATCH 15/20] Update docs/changelog/137274.yaml
---
docs/changelog/137274.yaml | 5 +++++
1 file changed, 5 insertions(+)
create mode 100644 docs/changelog/137274.yaml
diff --git a/docs/changelog/137274.yaml b/docs/changelog/137274.yaml
new file mode 100644
index 0000000000000..c26c0940f4a51
--- /dev/null
+++ b/docs/changelog/137274.yaml
@@ -0,0 +1,5 @@
+pr: 137274
+summary: Use a new synthetic `_id` format for time-series datastreams
+area: TSDB
+type: enhancement
+issues: []
From 136a267e626d9c3b6176bedde6007de3ad1bce09 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 13:56:15 +0100
Subject: [PATCH 16/20] ensure no postings
---
.../datastreams/TSDBSyntheticIdsIT.java | 2 +
.../codec/tsdb/TSDBSyntheticIdCodec.java | 80 ++++++++++++++++---
2 files changed, 71 insertions(+), 11 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 79892d1a3dd89..1de17ba893916 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -25,6 +25,7 @@
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.plugins.Plugin;
@@ -388,6 +389,7 @@ private static void putDataStreamTemplate(String indexPattern, int shards) throw
final var settings = indexSettings(shards, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false)
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), -1)
+ .put(EngineConfig.USE_COMPOUND_FILE, false)
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true);
final var mappings = """
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
index 4d885fbc88e1a..f44741c0e6c68 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
@@ -11,12 +11,19 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.elasticsearch.index.mapper.SyntheticIdField;
@@ -36,21 +43,25 @@
* of terms and postings on the field (now called a "synthetic _id" field) as if it was backed by an in inverted index.
*
*
- * In order to do this, it enforces synthetic _id fields to be indexed with the {@link IndexOptions#NONE} option, hence preventing the
- * building of a term dictionary with postings lists. The codec also changes this {@link IndexOptions#NONE} option back to
- * {@link IndexOptions#DOCS} when reading the {@link FieldInfos} during the opening of a new segment core reader. This allows to use a
- * Lucene term dictionary on top of a synthetic _id field that does not have corresponding postings files on disk. Finally, the codec
- * injects additional {@link FieldInfos} attributes so that Lucene's {@link PerFieldPostingsFormat} correctly instantiates a
- * {@link TSDBSyntheticIdPostingsFormat} to access the term and postings of the synthetic _id field.
+ * In order to do this, it wraps the default postings format with an implementation that throws an {@link IllegalArgumentException} if
+ * a Lucene field with the name {@code _id} produces terms (ie, has postings) during indexing. It also overwrites the {@link FieldInfos}
+ * to ensure that the {@code _id} field information has the {@link IndexOptions#NONE} option when written to disk. It also changes this
+ * {@link IndexOptions#NONE} option back to {@link IndexOptions#DOCS} when reading the {@link FieldInfos} during the opening of a new
+ * segment core reader. This allows to use a Lucene term dictionary on top of a synthetic _id field that does not have corresponding
+ * postings files on disk. Finally, the codec injects additional {@link FieldInfos} attributes so that Lucene's
+ * {@link PerFieldPostingsFormat} correctly instantiates a {@link TSDBSyntheticIdPostingsFormat} to access the term and postings of the
+ * synthetic _id field.
*
*/
public class TSDBSyntheticIdCodec extends FilterCodec {
- private final TSDBSyntheticIdFieldInfosFormat fieldInfosFormat;
+ private final RewriteFieldInfosFormat fieldInfosFormat;
+ private final EnsureNoPostingsFormat postingsFormat;
public TSDBSyntheticIdCodec(String name, Codec delegate) {
super(name, delegate);
- this.fieldInfosFormat = new TSDBSyntheticIdFieldInfosFormat(delegate.fieldInfosFormat());
+ this.fieldInfosFormat = new RewriteFieldInfosFormat(delegate.fieldInfosFormat());
+ this.postingsFormat = new EnsureNoPostingsFormat(delegate.postingsFormat());
}
@Override
@@ -58,14 +69,19 @@ public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}
+ @Override
+ public PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
/**
- * {@link FieldInfosFormat} that ensures the _id field is synthetic
+ * {@link FieldInfosFormat} that overwrites the {@link FieldInfos}.
*/
- private static class TSDBSyntheticIdFieldInfosFormat extends FieldInfosFormat {
+ private static class RewriteFieldInfosFormat extends FieldInfosFormat {
private final FieldInfosFormat delegate;
- private TSDBSyntheticIdFieldInfosFormat(FieldInfosFormat delegate) {
+ private RewriteFieldInfosFormat(FieldInfosFormat delegate) {
this.delegate = delegate;
}
@@ -206,4 +222,46 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm
return new FieldInfos(infos);
}
}
+
+ /**
+ * {@link PostingsFormat} that throws an {@link IllegalArgumentException} if a Lucene field with the name {@code _id} has postings
+ * produces during indexing.
+ */
+ private static class EnsureNoPostingsFormat extends PostingsFormat {
+
+ private final PostingsFormat delegate;
+
+ private EnsureNoPostingsFormat(PostingsFormat delegate) {
+ super(delegate.getName());
+ this.delegate = delegate;
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ final var consumer = delegate.fieldsConsumer(state);
+ return new FieldsConsumer() {
+ @Override
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ for (var field : fields) {
+ if (SYNTHETIC_ID.equalsIgnoreCase(field)) {
+ var message = "Field [" + SYNTHETIC_ID + "] has terms produced during indexing";
+ assert false : message;
+ throw new IllegalArgumentException(message);
+ }
+ }
+ consumer.write(fields, norms);
+ }
+
+ @Override
+ public void close() throws IOException {
+ consumer.close();
+ }
+ };
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return delegate.fieldsProducer(state);
+ }
+ }
}
From 3b22c46861606b393b4b9c10e67f0b6b048ba517 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Mon, 3 Nov 2025 15:50:13 +0100
Subject: [PATCH 17/20] remove sort
---
.../org/elasticsearch/index/IndexMode.java | 45 ++++++++-----------
1 file changed, 18 insertions(+), 27 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java
index b3df1bfd8ea5a..10e604126f934 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexMode.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java
@@ -42,13 +42,15 @@
import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.toSet;
/**
* "Mode" that controls which behaviors and settings an index supports.
@@ -139,16 +141,7 @@ void validateWithOtherSettings(Map, Object> settings) {
throw new IllegalArgumentException(error(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING));
}
- Settings settingsWithIndexMode;
- if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
- settingsWithIndexMode = Settings.builder()
- .put(IndexSettings.MODE.getKey(), getName())
- // Default values of some index sort settings depend of the feature flag and USE_SYNTHETIC_ID setting
- .put(IndexSettings.USE_SYNTHETIC_ID.getKey(), (Boolean) settings.get(IndexSettings.USE_SYNTHETIC_ID))
- .build();
- } else {
- settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
- }
+ var settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
for (Setting> unsupported : TIME_SERIES_UNSUPPORTED) {
if (false == Objects.equals(unsupported.getDefault(settingsWithIndexMode), settings.get(unsupported))) {
@@ -467,22 +460,20 @@ private static CompressedXContent createDefaultMapping(boolean includeHostName)
IndexSortConfig.INDEX_SORT_MISSING_SETTING
);
- static final List> VALIDATE_WITH_SETTINGS;
- static {
- var settings = new HashSet>();
- settings.add(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING);
- settings.add(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING);
- settings.add(IndexMetadata.INDEX_ROUTING_PATH);
- settings.add(IndexMetadata.INDEX_DIMENSIONS);
- settings.add(IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS);
- settings.add(IndexSettings.TIME_SERIES_START_TIME);
- settings.add(IndexSettings.TIME_SERIES_END_TIME);
- if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
- settings.add(IndexSettings.USE_SYNTHETIC_ID);
- }
- settings.addAll(TIME_SERIES_UNSUPPORTED);
- VALIDATE_WITH_SETTINGS = List.copyOf(settings);
- }
+ static final List> VALIDATE_WITH_SETTINGS = List.copyOf(
+ Stream.concat(
+ Stream.of(
+ IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING,
+ IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING,
+ IndexMetadata.INDEX_ROUTING_PATH,
+ IndexMetadata.INDEX_DIMENSIONS,
+ IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS,
+ IndexSettings.TIME_SERIES_START_TIME,
+ IndexSettings.TIME_SERIES_END_TIME
+ ),
+ TIME_SERIES_UNSUPPORTED.stream()
+ ).collect(toSet())
+ );
private final String name;
From 546e23bc0c31c3730e3af393f9f62886161b0429 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Tue, 4 Nov 2025 10:33:53 +0100
Subject: [PATCH 18/20] remove compound
---
.../java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java | 2 --
1 file changed, 2 deletions(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 1de17ba893916..79892d1a3dd89 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -25,7 +25,6 @@
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.plugins.Plugin;
@@ -389,7 +388,6 @@ private static void putDataStreamTemplate(String indexPattern, int shards) throw
final var settings = indexSettings(shards, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false)
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), -1)
- .put(EngineConfig.USE_COMPOUND_FILE, false)
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true);
final var mappings = """
From 3655dc314a036320367f639e237140de7dcd5b0b Mon Sep 17 00:00:00 2001
From: tlrx
Date: Wed, 5 Nov 2025 10:35:40 +0100
Subject: [PATCH 19/20] feedback
---
.../datastreams/TSDBSyntheticIdsIT.java | 29 +++++++++++++++++++
.../codec/tsdb/TSDBSyntheticIdCodec.java | 2 +-
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
index 79892d1a3dd89..654051b9e13f5 100644
--- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
+++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -109,6 +109,7 @@ public void testSyntheticId() throws Exception {
final var docs = new HashMap();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
final var timestamp = Instant.now();
+ logger.info("timestamp is " + timestamp);
// Index 10 docs in datastream
//
@@ -200,6 +201,34 @@ enum Operation {
assertThat(deleteResponse.getVersion(), equalTo(2L));
}
+ // Index more random docs
+ if (randomBoolean()) {
+ int nbDocs = randomIntBetween(1, 100);
+ final var arrayOfDocs = new XContentBuilder[nbDocs];
+
+ var t = timestamp.plus(4, unit); // t + 4s, no overlap with previous docs
+ while (nbDocs > 0) {
+ var hosts = randomSubsetOf(List.of("vm-dev01", "vm-dev02", "vm-dev03"));
+ for (var host : hosts) {
+ if (--nbDocs < 0) {
+ break;
+ }
+ arrayOfDocs[nbDocs] = document(t, host, "cpu-load", randomInt(10));
+ }
+ // always use seconds, otherwise the doc might fell outside of the timestamps window of the datastream
+ t = t.plus(1, ChronoUnit.SECONDS);
+ }
+
+ results = createDocuments(dataStreamName, arrayOfDocs);
+
+ // Verify that documents are created
+ for (var result : results) {
+ assertThat(result.getResponse().getResult(), equalTo(DocWriteResponse.Result.CREATED));
+ assertThat(result.getVersion(), equalTo(1L));
+ docs.put(result.getId(), result.getIndex());
+ }
+ }
+
refresh(dataStreamName);
assertCheckedResponse(client().prepareSearch(dataStreamName).setTrackTotalHits(true).setSize(100), searchResponse -> {
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
index f44741c0e6c68..aa6936cb65df9 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java
@@ -225,7 +225,7 @@ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segm
/**
* {@link PostingsFormat} that throws an {@link IllegalArgumentException} if a Lucene field with the name {@code _id} has postings
- * produces during indexing.
+ * produced during indexing.
*/
private static class EnsureNoPostingsFormat extends PostingsFormat {
From 608ff674cd870bd5574c62682139de356f081672 Mon Sep 17 00:00:00 2001
From: tlrx
Date: Wed, 5 Nov 2025 13:23:12 +0100
Subject: [PATCH 20/20] fix setting registration
---
.../org/elasticsearch/cluster/metadata/IndexMetadata.java | 4 +++-
.../src/main/java/org/elasticsearch/index/IndexSettings.java | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
index 6fee4b39dbe22..094626018d449 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
@@ -2514,7 +2514,9 @@ IndexMetadata build(boolean repair) {
final IndexMode indexMode = indexModeString != null ? IndexMode.fromString(indexModeString.toLowerCase(Locale.ROOT)) : null;
final boolean isTsdb = indexMode == IndexMode.TIME_SERIES;
boolean useTimeSeriesSyntheticId = false;
- if (isTsdb && indexCreatedVersion.onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)) {
+ if (isTsdb
+ && IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG
+ && indexCreatedVersion.onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)) {
var setting = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
if (setting != null && setting.equalsIgnoreCase(Boolean.TRUE.toString())) {
assert IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG;
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
index f7af6bf098761..81b4bbab69756 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
@@ -1182,7 +1182,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
&& scopedSettings.get(RECOVERY_USE_SYNTHETIC_SOURCE_SETTING);
useDocValuesSkipper = DOC_VALUES_SKIPPER && scopedSettings.get(USE_DOC_VALUES_SKIPPER);
seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING);
- final var useSyntheticId = scopedSettings.get(USE_SYNTHETIC_ID);
+ final var useSyntheticId = IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG && scopedSettings.get(USE_SYNTHETIC_ID);
if (indexMetadata.useTimeSeriesSyntheticId() != useSyntheticId) {
assert false;
throw new IllegalArgumentException(