Skip to content
8 changes: 8 additions & 0 deletions docs/changelog/133946.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pr: 133946
summary: Merging is now more aggressive by default, especially for segments under 16MB.
area: Engine
type: enhancement
issues:
- 120624
- 129764
- 130328
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@
* Segments smaller than this are "rounded up" to this size, i.e. treated as
* equal (floor) size for merge selection. This is to prevent frequent
* flushing of tiny segments, thus preventing a long tail in the index. Default
* is <code>2mb</code>.
* is <code>16mb</code>.
*
* <li><code>index.merge.policy.max_merge_at_once</code>:
*
* Maximum number of segments to be merged at a time during "normal" merging.
* Default is <code>10</code>.
* Default is <code>16</code>.
*
* <li><code>index.merge.policy.max_merged_segment</code>:
*
Expand All @@ -65,7 +65,7 @@
* <li><code>index.merge.policy.segments_per_tier</code>:
*
* Sets the allowed number of segments per tier. Smaller values mean more
* merging but fewer segments. Default is <code>10</code>. Note, this value needs to be
* merging but fewer segments. Default is <code>8</code>. Note, this value needs to be
* &gt;= than the <code>max_merge_at_once</code> otherwise you'll force too many merges to
* occur.
*
Expand Down Expand Up @@ -116,8 +116,8 @@ public final class MergePolicyConfig {
private final ByteSizeValue defaultMaxTimeBasedMergedSegment;

public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d;
public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(2, ByteSizeUnit.MB);
public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10;
public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = ByteSizeValue.of(16, ByteSizeUnit.MB);
public static final int DEFAULT_MAX_MERGE_AT_ONCE = 16;
public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = ByteSizeValue.of(5, ByteSizeUnit.GB);
public static final Setting<ByteSizeValue> DEFAULT_MAX_MERGED_SEGMENT_SETTING = Setting.byteSizeSetting(
"indices.merge.policy.max_merged_segment",
Expand All @@ -139,9 +139,9 @@ public final class MergePolicyConfig {
ByteSizeValue.ofBytes(Long.MAX_VALUE),
Setting.Property.NodeScope
);
public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d;
public static final double DEFAULT_SEGMENTS_PER_TIER = 8.0d;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can potentially cause serverless tests to fail though I'd expect the PR build to catch that (looks like it successfully ran serverless tests). We saw that when the lucene default changed. Perhaps we can run one more CI run (to get another randomized sample)?

/**
* A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 10 in
* A default value for {@link LogByteSizeMergePolicy}'s merge factor: 32. This default value differs from the Lucene default of 8 in
* order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where adjacent segment
* merging ensures that segments have mostly non-overlapping time ranges if data gets ingested in timestamp order. In turn, this allows
* range queries on the timestamp to remain efficient with high numbers of segments since most segments either don't match the query
Expand Down Expand Up @@ -292,7 +292,6 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) {
INDEX_MERGE_ENABLED
);
}
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
setMergePolicyType(mergePolicyType);
setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING));
setExpungeDeletesAllowed(forceMergeDeletesPctAllowed);
Expand Down Expand Up @@ -365,25 +364,6 @@ void setDeletesPctAllowed(Double deletesPctAllowed) {
// LogByteSizeMergePolicy doesn't have a similar configuration option
}

private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) {
// fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
if (segmentsPerTier < maxMergeAtOnce) {
int newMaxMergeAtOnce = (int) segmentsPerTier;
// max merge at once should be at least 2
if (newMaxMergeAtOnce <= 1) {
newMaxMergeAtOnce = 2;
}
logger.debug(
"changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or " + "equal to it",
maxMergeAtOnce,
newMaxMergeAtOnce,
segmentsPerTier
);
maxMergeAtOnce = newMaxMergeAtOnce;
}
return maxMergeAtOnce;
}

@SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok")
MergePolicy getMergePolicy(boolean isTimeBasedIndex) {
if (mergesEnabled == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2713,7 +2713,8 @@ public void testSlicingBehaviourForParallelCollection() throws Exception {
assert String.valueOf(SEARCH_POOL_SIZE).equals(node().settings().get("thread_pool.search.size"))
: "Unexpected thread_pool.search.size";

int numDocs = randomIntBetween(50, 100);
// Between 4 and 6 segments of 5 docs each.
int numDocs = randomIntBetween(20, 30);
for (int i = 0; i < numDocs; i++) {
prepareIndex("index").setId(String.valueOf(i)).setSource("field", "value").get();
if (i % 5 == 0) {
Expand Down