diff --git a/docs/changelog/133946.yaml b/docs/changelog/133946.yaml
new file mode 100644
index 0000000000000..89d45e0212d0d
--- /dev/null
+++ b/docs/changelog/133946.yaml
@@ -0,0 +1,8 @@
+pr: 133946
+summary: Merging is now more aggressive by default, especially for segments under 16MB.
+area: Engine
+type: enhancement
+issues:
+ - 120624
+ - 129764
+ - 130328
diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java
index 9b507b5800ba1..523ff7a84c961 100644
--- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java
+++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java
@@ -48,12 +48,12 @@
* Segments smaller than this are "rounded up" to this size, i.e. treated as
* equal (floor) size for merge selection. This is to prevent frequent
* flushing of tiny segments, thus preventing a long tail in the index. Default
- * is 2mb.
+ * is 16mb.
*
*
index.merge.policy.max_merge_at_once:
*
* Maximum number of segments to be merged at a time during "normal" merging.
- * Default is 10.
+ * Default is 16.
*
* index.merge.policy.max_merged_segment:
*
@@ -65,7 +65,7 @@
* index.merge.policy.segments_per_tier:
*
* Sets the allowed number of segments per tier. Smaller values mean more
- * merging but fewer segments. Default is 10. Note, this value needs to be
+ * merging but fewer segments. Default is 8. Note, this value needs to be
* >= than the max_merge_at_once otherwise you'll force too many merges to
* occur.
*
@@ -116,8 +116,8 @@ public final class MergePolicyConfig {
private final ByteSizeValue defaultMaxTimeBasedMergedSegment;
public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d;
- public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(2, ByteSizeUnit.MB);
- public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10;
+ public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(16, ByteSizeUnit.MB);
+ public static final int DEFAULT_MAX_MERGE_AT_ONCE = 16;
public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = ByteSizeValue.of(5, ByteSizeUnit.GB);
public static final Setting DEFAULT_MAX_MERGED_SEGMENT_SETTING = Setting.byteSizeSetting(
"indices.merge.policy.max_merged_segment",
@@ -139,9 +139,9 @@ public final class MergePolicyConfig {
ByteSizeValue.ofBytes(Long.MAX_VALUE),
Setting.Property.NodeScope
);
- public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d;
+ public static final double DEFAULT_SEGMENTS_PER_TIER = 8.0d;
/**
- * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 10 in
+ * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 8 in
* order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where adjacent segment
* merging ensures that segments have mostly non-overlapping time ranges if data gets ingested in timestamp order. In turn, this allows
* range queries on the timestamp to remain efficient with high numbers of segments since most segments either don't match the query
@@ -292,7 +292,6 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) {
INDEX_MERGE_ENABLED
);
}
- maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
setMergePolicyType(mergePolicyType);
setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING));
setExpungeDeletesAllowed(forceMergeDeletesPctAllowed);
@@ -365,25 +364,6 @@ void setDeletesPctAllowed(Double deletesPctAllowed) {
// LogByteSizeMergePolicy doesn't have a similar configuration option
}
- private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) {
- // fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
- if (segmentsPerTier < maxMergeAtOnce) {
- int newMaxMergeAtOnce = (int) segmentsPerTier;
- // max merge at once should be at least 2
- if (newMaxMergeAtOnce <= 1) {
- newMaxMergeAtOnce = 2;
- }
- logger.debug(
- "changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or " + "equal to it",
- maxMergeAtOnce,
- newMaxMergeAtOnce,
- segmentsPerTier
- );
- maxMergeAtOnce = newMaxMergeAtOnce;
- }
- return maxMergeAtOnce;
- }
-
@SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok")
MergePolicy getMergePolicy(boolean isTimeBasedIndex) {
if (mergesEnabled == false) {
diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java
index 0cd60823a22cc..7a6e04f8a2fc2 100644
--- a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java
+++ b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java
@@ -2713,7 +2713,8 @@ public void testSlicingBehaviourForParallelCollection() throws Exception {
assert String.valueOf(SEARCH_POOL_SIZE).equals(node().settings().get("thread_pool.search.size"))
: "Unexpected thread_pool.search.size";
- int numDocs = randomIntBetween(50, 100);
+ // Between 4 and 6 segments of 5 docs each.
+ int numDocs = randomIntBetween(20, 30);
for (int i = 0; i < numDocs; i++) {
prepareIndex("index").setId(String.valueOf(i)).setSource("field", "value").get();
if (i % 5 == 0) {