From 1ea5cce3561d02182b5788dfafa115fbb0c611fb Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Thu, 20 Nov 2025 03:11:45 -0500 Subject: [PATCH 1/2] Add merge policy to block older segments from participating in merges --- .../index/LatestVersionFilterMergePolicy.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java diff --git a/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java b/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java new file mode 100644 index 00000000000..5063729d2d0 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java @@ -0,0 +1,52 @@ +package org.apache.solr.index; + +import java.io.IOException; +import java.util.Map; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.util.Version; + +/** + * Only allows latest version segments to be considered for merges. That way a snapshot of older + * segments can remain consistent + */ +public class LatestVersionFilterMergePolicy extends MergePolicy { + MergePolicy delegatePolicy = new TieredMergePolicy(); + + @Override + public MergeSpecification findMerges( + MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException { + /*we don't want to remove from the original SegmentInfos, else the segments may not carry forward upon a commit. + That would be catastrophic. Hence we clone.*/ + SegmentInfos infosClone = infos.clone(); + infosClone.clear(); + for (SegmentCommitInfo info : infos) { + if (info.info.getMinVersion() != null + && info.info.getMinVersion().major == Version.LATEST.major) { + infosClone.add(info); + } + } + + return delegatePolicy.findMerges(mergeTrigger, infosClone, mergeContext); + } + + @Override + public MergeSpecification findForcedMerges( + SegmentInfos segmentInfos, + int maxSegmentCount, + Map segmentsToMerge, + MergeContext mergeContext) + throws IOException { + return delegatePolicy.findForcedMerges( + segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext); + } + + @Override + public MergeSpecification findForcedDeletesMerges( + SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException { + return delegatePolicy.findForcedDeletesMerges(segmentInfos, mergeContext); + } +} From cdc9978b2581318eb22c50b36744b6fb06f98f07 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Wed, 26 Nov 2025 01:27:41 -0500 Subject: [PATCH 2/2] extend FilterMergePolicy for delegation and modularize logic --- .../index/LatestVersionFilterMergePolicy.java | 69 ++++++++++++------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java b/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java index 5063729d2d0..5841f99fb75 100644 --- a/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java +++ b/solr/core/src/java/org/apache/solr/index/LatestVersionFilterMergePolicy.java @@ -2,51 +2,74 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.index.FilterMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeTrigger; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.util.Version; /** - * Only allows latest version segments to be considered for merges. That way a snapshot of older - * segments can remain consistent + * Prevents any older version segment (< {@link Version.LATEST}), either original or one derived as + * a result of merging with an older version segment, from being considered for merges. That way a + * snapshot of older segments remains consistent. This assists in upgrading to a future Lucene major + * version if existing documents are reindexed in the current version with this merge policy in + * place. */ -public class LatestVersionFilterMergePolicy extends MergePolicy { - MergePolicy delegatePolicy = new TieredMergePolicy(); +public class LatestVersionFilterMergePolicy extends FilterMergePolicy { + + public LatestVersionFilterMergePolicy(MergePolicy in) { + super(in); + } @Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException { - /*we don't want to remove from the original SegmentInfos, else the segments may not carry forward upon a commit. - That would be catastrophic. Hence we clone.*/ - SegmentInfos infosClone = infos.clone(); - infosClone.clear(); - for (SegmentCommitInfo info : infos) { - if (info.info.getMinVersion() != null - && info.info.getMinVersion().major == Version.LATEST.major) { - infosClone.add(info); - } - } - - return delegatePolicy.findMerges(mergeTrigger, infosClone, mergeContext); + return in.findMerges(mergeTrigger, getFilteredInfosClone(infos), mergeContext); } @Override public MergeSpecification findForcedMerges( - SegmentInfos segmentInfos, + SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge, MergeContext mergeContext) throws IOException { - return delegatePolicy.findForcedMerges( - segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext); + return in.findForcedMerges( + getFilteredInfosClone(infos), maxSegmentCount, segmentsToMerge, mergeContext); + } + + @Override + public MergeSpecification findForcedDeletesMerges(SegmentInfos infos, MergeContext mergeContext) + throws IOException { + return in.findForcedDeletesMerges(getFilteredInfosClone(infos), mergeContext); } @Override - public MergeSpecification findForcedDeletesMerges( - SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException { - return delegatePolicy.findForcedDeletesMerges(segmentInfos, mergeContext); + public MergeSpecification findFullFlushMerges( + MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException { + return in.findFullFlushMerges(mergeTrigger, getFilteredInfosClone(infos), mergeContext); + } + + private SegmentInfos getFilteredInfosClone(SegmentInfos infos) { + // We should not remove from the original SegmentInfos. Hence we clone. + SegmentInfos infosClone = infos.clone(); + infosClone.clear(); + for (SegmentCommitInfo info : infos) { + if (allowSegmentForMerge(info)) { + infosClone.add(info); + } + } + return infosClone; + } + + /** + * Determines if a SegmentCommitInfo should be part of the candidate set of segments that will be + * considered for merges. By default, we only allow LATEST version segments to participate in + * merges. + */ + protected boolean allowSegmentForMerge(SegmentCommitInfo info) { + return info.info.getMinVersion() != null + && info.info.getMinVersion().major == Version.LATEST.major; } }