diff --git a/docs/changelog/133946.yaml b/docs/changelog/133946.yaml new file mode 100644 index 0000000000000..89d45e0212d0d --- /dev/null +++ b/docs/changelog/133946.yaml @@ -0,0 +1,8 @@ +pr: 133946 +summary: Merging is now more aggressive by default, especially for segments under 16MB. +area: Engine +type: enhancement +issues: + - 120624 + - 129764 + - 130328 diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 9b507b5800ba1..523ff7a84c961 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -48,12 +48,12 @@ * Segments smaller than this are "rounded up" to this size, i.e. treated as * equal (floor) size for merge selection. This is to prevent frequent * flushing of tiny segments, thus preventing a long tail in the index. Default - * is 2mb. + * is 16mb. * *
  • index.merge.policy.max_merge_at_once: * * Maximum number of segments to be merged at a time during "normal" merging. - * Default is 10. + * Default is 16. * *
  • index.merge.policy.max_merged_segment: * @@ -65,7 +65,7 @@ *
  • index.merge.policy.segments_per_tier: * * Sets the allowed number of segments per tier. Smaller values mean more - * merging but fewer segments. Default is 10. Note, this value needs to be + * merging but fewer segments. Default is 8. Note, this value needs to be * >= than the max_merge_at_once otherwise you'll force too many merges to * occur. * @@ -116,8 +116,8 @@ public final class MergePolicyConfig { private final ByteSizeValue defaultMaxTimeBasedMergedSegment; public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d; - public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(2, ByteSizeUnit.MB); - public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10; + public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(16, ByteSizeUnit.MB); + public static final int DEFAULT_MAX_MERGE_AT_ONCE = 16; public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = ByteSizeValue.of(5, ByteSizeUnit.GB); public static final Setting DEFAULT_MAX_MERGED_SEGMENT_SETTING = Setting.byteSizeSetting( "indices.merge.policy.max_merged_segment", @@ -139,9 +139,9 @@ public final class MergePolicyConfig { ByteSizeValue.ofBytes(Long.MAX_VALUE), Setting.Property.NodeScope ); - public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d; + public static final double DEFAULT_SEGMENTS_PER_TIER = 8.0d; /** - * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 10 in + * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 8 in * order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where adjacent segment * merging ensures that segments have mostly non-overlapping time ranges if data gets ingested in timestamp order. In turn, this allows * range queries on the timestamp to remain efficient with high numbers of segments since most segments either don't match the query @@ -292,7 +292,6 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { INDEX_MERGE_ENABLED ); } - maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); setMergePolicyType(mergePolicyType); setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING)); setExpungeDeletesAllowed(forceMergeDeletesPctAllowed); @@ -365,25 +364,6 @@ void setDeletesPctAllowed(Double deletesPctAllowed) { // LogByteSizeMergePolicy doesn't have a similar configuration option } - private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) { - // fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce - if (segmentsPerTier < maxMergeAtOnce) { - int newMaxMergeAtOnce = (int) segmentsPerTier; - // max merge at once should be at least 2 - if (newMaxMergeAtOnce <= 1) { - newMaxMergeAtOnce = 2; - } - logger.debug( - "changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or " + "equal to it", - maxMergeAtOnce, - newMaxMergeAtOnce, - segmentsPerTier - ); - maxMergeAtOnce = newMaxMergeAtOnce; - } - return maxMergeAtOnce; - } - @SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok") MergePolicy getMergePolicy(boolean isTimeBasedIndex) { if (mergesEnabled == false) { diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java index 0cd60823a22cc..7a6e04f8a2fc2 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java @@ -2713,7 +2713,8 @@ public void testSlicingBehaviourForParallelCollection() throws Exception { assert String.valueOf(SEARCH_POOL_SIZE).equals(node().settings().get("thread_pool.search.size")) : "Unexpected thread_pool.search.size"; - int numDocs = randomIntBetween(50, 100); + // Between 4 and 6 segments of 5 docs each. + int numDocs = randomIntBetween(20, 30); for (int i = 0; i < numDocs; i++) { prepareIndex("index").setId(String.valueOf(i)).setSource("field", "value").get(); if (i % 5 == 0) {