From e2aaa816ddceb0664a73daa60927bd301eac41b1 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 1 Sep 2025 16:02:05 +0200 Subject: [PATCH 1/5] Align merging defaults with Lucene's new defaults. Lucene recently updated its merging defaults to bias a bit less towards indexing performance and a bit more towards search performance by: - Increasing the floor segment size from 2MB to 16MB. Segments between 2MB and 16MB will now be merged more aggressively. This is expected to result in ~10 fewer segments per shard. - Decreasing the number of segments per tier from 10 to 8. This is expected to result in 20% fewer segments between 16MB and 5GB (the min and max merged segment sizes). This PR aligns Elasticsearch's defaults with these new Lucene defaults. This should especially help queries that have a high per-segment overhead, such as multi-term queries (e.g. fuzzy queries) and vector search. On the other hand, indexing performance may decrease a bit due to more merging. Note that time-based data (indexes that have a `@timestamp` field) have their own merge factor of 32, so they only get the bump of the floor segment size to 16MB, not the decrease of the number of segments per tier. Furthermore, Lucene now allows merging up to `maxMergeAtOnce` segments if the merged segment size is below the floor segment size (16MB by default). When `maxMergeAtOnce` is greater than `segmentsPerTier`, this helps tiny segments grow more quickly with less write amplification. So to take advantage of it, I bumped `maxMergeAtOnce` from 10 to 16. This anticipates upcoming behavior in Lucene 11 where `maxMergeAtOnce` gets removed and Lucene will happily merge lots of segments together in a single merge as long as the merged segment size is below the floor segment size. Closes #129764 Closes #130328 --- .../index/MergePolicyConfig.java | 34 ++++--------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 9b507b5800ba1..523ff7a84c961 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -48,12 +48,12 @@ * Segments smaller than this are "rounded up" to this size, i.e. treated as * equal (floor) size for merge selection. This is to prevent frequent * flushing of tiny segments, thus preventing a long tail in the index. Default - * is 2mb. + * is 16mb. * *
  • index.merge.policy.max_merge_at_once: * * Maximum number of segments to be merged at a time during "normal" merging. - * Default is 10. + * Default is 16. * *
  • index.merge.policy.max_merged_segment: * @@ -65,7 +65,7 @@ *
  • index.merge.policy.segments_per_tier: * * Sets the allowed number of segments per tier. Smaller values mean more - * merging but fewer segments. Default is 10. Note, this value needs to be + * merging but fewer segments. Default is 8. Note, this value needs to be * >= than the max_merge_at_once otherwise you'll force too many merges to * occur. * @@ -116,8 +116,8 @@ public final class MergePolicyConfig { private final ByteSizeValue defaultMaxTimeBasedMergedSegment; public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d; - public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(2, ByteSizeUnit.MB); - public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10; + public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(16, ByteSizeUnit.MB); + public static final int DEFAULT_MAX_MERGE_AT_ONCE = 16; public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = ByteSizeValue.of(5, ByteSizeUnit.GB); public static final Setting DEFAULT_MAX_MERGED_SEGMENT_SETTING = Setting.byteSizeSetting( "indices.merge.policy.max_merged_segment", @@ -139,9 +139,9 @@ public final class MergePolicyConfig { ByteSizeValue.ofBytes(Long.MAX_VALUE), Setting.Property.NodeScope ); - public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d; + public static final double DEFAULT_SEGMENTS_PER_TIER = 8.0d; /** - * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 10 in + * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 8 in * order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where adjacent segment * merging ensures that segments have mostly non-overlapping time ranges if data gets ingested in timestamp order. In turn, this allows * range queries on the timestamp to remain efficient with high numbers of segments since most segments either don't match the query @@ -292,7 +292,6 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { INDEX_MERGE_ENABLED ); } - maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); setMergePolicyType(mergePolicyType); setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING)); setExpungeDeletesAllowed(forceMergeDeletesPctAllowed); @@ -365,25 +364,6 @@ void setDeletesPctAllowed(Double deletesPctAllowed) { // LogByteSizeMergePolicy doesn't have a similar configuration option } - private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) { - // fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce - if (segmentsPerTier < maxMergeAtOnce) { - int newMaxMergeAtOnce = (int) segmentsPerTier; - // max merge at once should be at least 2 - if (newMaxMergeAtOnce <= 1) { - newMaxMergeAtOnce = 2; - } - logger.debug( - "changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or " + "equal to it", - maxMergeAtOnce, - newMaxMergeAtOnce, - segmentsPerTier - ); - maxMergeAtOnce = newMaxMergeAtOnce; - } - return maxMergeAtOnce; - } - @SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok") MergePolicy getMergePolicy(boolean isTimeBasedIndex) { if (mergesEnabled == false) { From 9e4b054098f495d15866abac4c8fba963e823c4d Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 9 Sep 2025 08:57:39 +0200 Subject: [PATCH 2/5] Update docs/changelog/133946.yaml --- docs/changelog/133946.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 docs/changelog/133946.yaml diff --git a/docs/changelog/133946.yaml b/docs/changelog/133946.yaml new file mode 100644 index 0000000000000..4b03505f632be --- /dev/null +++ b/docs/changelog/133946.yaml @@ -0,0 +1,8 @@ +pr: 133946 +summary: Align merging defaults with Lucene's new defaults +area: Engine +type: enhancement +issues: + - 120624 + - 129764 + - 130328 From 6b3bb74963718c64a7d26536b99dd6a67b78712f Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 9 Sep 2025 09:08:25 +0200 Subject: [PATCH 3/5] changelog --- docs/changelog/133946.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/133946.yaml b/docs/changelog/133946.yaml index 4b03505f632be..89d45e0212d0d 100644 --- a/docs/changelog/133946.yaml +++ b/docs/changelog/133946.yaml @@ -1,5 +1,5 @@ pr: 133946 -summary: Align merging defaults with Lucene's new defaults +summary: Merging is now more aggressive by default, especially for segments under 16MB. area: Engine type: enhancement issues: From 8fb9ecaa92c67fe6bba823210cdc334aaddc1e63 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 9 Sep 2025 09:09:44 +0200 Subject: [PATCH 4/5] Fix test failure --- .../org/elasticsearch/search/SearchServiceSingleNodeTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java index 0cd60823a22cc..7a6e04f8a2fc2 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java @@ -2713,7 +2713,8 @@ public void testSlicingBehaviourForParallelCollection() throws Exception { assert String.valueOf(SEARCH_POOL_SIZE).equals(node().settings().get("thread_pool.search.size")) : "Unexpected thread_pool.search.size"; - int numDocs = randomIntBetween(50, 100); + // Between 4 and 6 segments of 5 docs each. + int numDocs = randomIntBetween(20, 30); for (int i = 0; i < numDocs; i++) { prepareIndex("index").setId(String.valueOf(i)).setSource("field", "value").get(); if (i % 5 == 0) { From 7d7dbfe5b9e8b546ebc7470d0b2af43186cffb5d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 23 Sep 2025 16:01:38 +0000 Subject: [PATCH 5/5] [CI] Update transport version definitions --- server/src/main/resources/transport/upper_bounds/9.2.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/resources/transport/upper_bounds/9.2.csv b/server/src/main/resources/transport/upper_bounds/9.2.csv index 6e7d51d3d3020..b1209b927d8a5 100644 --- a/server/src/main/resources/transport/upper_bounds/9.2.csv +++ b/server/src/main/resources/transport/upper_bounds/9.2.csv @@ -1 +1 @@ -security_stats_endpoint,9168000 +inference_api_openai_embeddings_headers,9169000