From 28dfa8e186d3106472efdf8cd825a76e418e3529 Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 24 Apr 2026 15:57:52 +0800 Subject: [PATCH 01/48] proto batch externalSort fix add manifest sort to compact job addTest review mvMorax fix spi proto proto fix fix # Conflicts: # paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java --- .../java/org/apache/paimon/CoreOptions.java | 33 ++ .../paimon/operation/FileStoreCommitImpl.java | 17 +- .../paimon/operation/ManifestFileMerger.java | 537 +++++++++++++++++- .../operation/ManifestPickStrategy.java | 138 +++++ .../paimon/operation/ManifestSortedRun.java | 131 +++++ .../paimon/manifest/ManifestFileMetaTest.java | 75 ++- .../NoPartitionManifestFileMetaTest.java | 20 +- 7 files changed, 912 insertions(+), 39 deletions(-) create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 176d1e9d4d47..4217428d8097 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -469,6 +469,30 @@ public InlineElement getDescription() { "To avoid frequent manifest merges, this parameter specifies the minimum number " + "of ManifestFileMeta to merge."); + public static final ConfigOption MANIFEST_SORT_ENABLE = + key("manifest-sort.enable") + .booleanType() + .defaultValue(false) + .withDescription( + "Whether to invoke manifest sort rewrite right after manifest merge" + + " during commit. The sort rewrite implementation is provided" + + " by an external module (e.g. morax) and discovered via" + + " ServiceLoader. When no implementation is registered on the" + + " classpath, this flag has no effect (manifest sort is" + + " silently skipped)."); + + public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = + key("manifest-sort.partition-field") + .stringType() + .noDefaultValue() + .withDescription( + "Partition field name to sort manifest entries by. Validated by" + + " schema validation; resolved to a 0-based index by the" + + " caller (an external sort rewrite implementation). For" + + " single-partition tables, optional (defaults to the only" + + " partition field). For multi-partition tables, REQUIRED" + + " when 'manifest-sort.enable' is true."); + public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2564,6 +2588,15 @@ public MemorySize manifestFullCompactionThresholdSize() { return options.get(MANIFEST_FULL_COMPACTION_FILE_SIZE); } + public boolean manifestSortEnable() { + return options.get(MANIFEST_SORT_ENABLE); + } + + @Nullable + public String manifestSortPartitionField() { + return options.get(MANIFEST_SORT_PARTITION_FIELD); + } + public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index 29ac8b5a3ecb..3f994947f1b7 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -54,6 +54,7 @@ import org.apache.paimon.operation.commit.SuccessCommitResult; import org.apache.paimon.operation.metrics.CommitMetrics; import org.apache.paimon.operation.metrics.CommitStats; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.partition.PartitionStatistics; import org.apache.paimon.predicate.Predicate; @@ -958,13 +959,7 @@ CommitResult tryCommitOnce( // try to merge old manifest files to create base manifest list mergeAfterManifests = ManifestFileMerger.merge( - mergeBeforeManifests, - manifestFile, - options.manifestTargetSize().getBytes(), - options.manifestMergeMinCount(), - options.manifestFullCompactionThresholdSize().getBytes(), - partitionType, - options.scanManifestParallelism()); + mergeBeforeManifests, manifestFile, partitionType, options); baseManifestList = manifestList.write(mergeAfterManifests); if (options.rowTrackingEnabled()) { @@ -1185,15 +1180,15 @@ private boolean compactManifestOnce() { List mergeAfterManifests; // the fist trial + Options tempOptions = options.toConfiguration(); + tempOptions.set("manifest.merge-min-count", "1"); + tempOptions.set("manifest.full-compaction-threshold-size", "1B"); mergeAfterManifests = ManifestFileMerger.merge( mergeBeforeManifests, manifestFile, - options.manifestTargetSize().getBytes(), - 1, - 1, partitionType, - options.scanManifestParallelism()); + CoreOptions.fromMap(tempOptions.toMap())); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index cdcad1ed3e84..3d7cccd5b34b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -18,13 +18,17 @@ package org.apache.paimon.operation; +import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -34,6 +38,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -48,7 +53,7 @@ import static org.apache.paimon.utils.ManifestReadThreadPool.sequentialBatchedExecute; import static org.apache.paimon.utils.Preconditions.checkArgument; -/** Util for merging manifest files. */ +/** Manifest file merger with standard merge logic and optional sort rewrite. */ public class ManifestFileMerger { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileMerger.class); @@ -62,25 +67,41 @@ public class ManifestFileMerger { public static List merge( List input, ManifestFile manifestFile, - long suggestedMetaSize, - int suggestedMinMetaCount, - long manifestFullCompactionSize, RowType partitionType, - @Nullable Integer manifestReadParallelism) { + CoreOptions options) { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + int suggestedMinMetaCount = options.manifestMergeMinCount(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + Options tableOptions = options.toConfiguration(); + // these are the newly created manifest files, clean them up if exception occurs List newFilesForAbort = new ArrayList<>(); try { - Optional> fullCompacted = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); - return fullCompacted.orElseGet( + Optional> merged; + + // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite + if (tableOptions.getBoolean("manifest-sort.enable", false) + && partitionType.getFieldCount() > 0) { + merged = + trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); + } else { + // Otherwise try full compaction first, then minor compaction if needed + merged = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + } + + return merged.orElseGet( () -> tryMinorCompaction( input, @@ -303,6 +324,492 @@ private static Set computeDeletePartitions(Set return partitions; } + // ==================== Manifest Sort Rewrite ==================== + + /** + * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort + * field cannot be resolved or the delta file size is below the full compaction threshold, the + * input is returned as-is. + */ + private static Optional> trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + Options tableOptions = options.toConfiguration(); + + // Step 1: Resolve sort field. + String sortField = resolveSortField(tableOptions.toMap(), partitionType); + if (sortField == null) { + LOG.warn( + "Cannot resolve sort field for manifest sort rewrite. " + + "Skipping sort. Configure 'manifest-sort.partition-field'" + + " for multi-partition tables."); + return Optional.of(input); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + + // Step 2: Check full compact trigger. + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + + List fullCompactionManifests = new ArrayList<>(); + List lsmFiles = new LinkedList<>(input); + Set deleteEntries = null; + if (totalDeltaFileSize >= manifestFullCompactionSize) { + // Step 3: Read delete entries and build partition predicate. + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } + } + + // Step 4: Classify input into level0 runs and LSM files. + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + fullCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + fullCompactionManifests.add(file); + } + } + } + + // Process full compaction manifests separately: sort, deduplicate, and rewrite + List fullCompactionRewritten = new ArrayList<>(); + if (!fullCompactionManifests.isEmpty()) { + fullCompactionRewritten = + sortAndRewriteFullCompaction( + fullCompactionManifests, + manifestFile, + sortFieldIndex, + sortFieldType, + suggestedMetaSize, + deleteEntries); + newFilesForAbort.addAll(fullCompactionRewritten); + } + + // Step 5: Build LSM Tree and assign levels (only for lsmFiles). + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + + // Step 6: Pick runs to compact. + int sizeAmpThreshold = tableOptions.getInteger("manifest-sort.size-amp-threshold", 2); + int sizeRatioThreshold = tableOptions.getInteger("manifest-sort.size-ratio-threshold", 10); + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + List pickedRuns = pickStrategy.pick(levelRuns); + + Set pickedSet = new HashSet<>(pickedRuns); + List reusedFiles = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + reusedFiles.addAll(run.files()); + } + } + + if (pickedRuns.isEmpty()) { + return Optional.of(new ArrayList<>(input)); + } + + // Step 7: Split picked files into sections, sort and rewrite each. + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + + List> sections = + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); + long maxRewriteSize = + parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); + long processedSize = 0; + + List result = new ArrayList<>(reusedFiles); + List sortNewFiles = new ArrayList<>(); + for (List section : sections) { + long sectionSize = 0; + for (ManifestFileMeta m : section) { + sectionSize += m.fileSize(); + } + if (processedSize + sectionSize > maxRewriteSize) { + result.addAll(section); + continue; + } + processedSize += sectionSize; + + List merged = + sortAndRewriteSection( + section, manifestFile, sortFieldIndex, sortFieldType, deleteEntries); + sortNewFiles.addAll(merged); + result.addAll(merged); + } + newFilesForAbort.addAll(sortNewFiles); + result.addAll(fullCompactionRewritten); + return Optional.of(result); + } + + // ==================== Sort Rewrite Helpers ==================== + + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(Map tableOptions, RowType partitionType) { + String configured = tableOptions.get("manifest-sort.partition-field"); + if (configured != null && !configured.isEmpty()) { + return configured; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } + + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> runFilesList = new ArrayList<>(); + List currentRun = new ArrayList<>(); + currentRun.add(input.get(0)); + for (int i = 1; i < input.size(); i++) { + ManifestFileMeta file = input.get(i); + ManifestFileMeta last = currentRun.get(currentRun.size() - 1); + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + > 0) { + currentRun.add(file); + } else { + runFilesList.add(currentRun); + currentRun = new ArrayList<>(); + currentRun.add(file); + } + } + runFilesList.add(currentRun); + + List runs = new ArrayList<>(runFilesList.size()); + for (List rf : runFilesList) { + runs.add(ManifestSortedRun.fromSorted(rf)); + } + + runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = runs.size(); + for (int i = 0; i < n; i++) { + if (i >= n - 4) { + runs.get(i).setLevel(n - i); + } else { + runs.get(i).setLevel(0); + } + } + return runs; + } + + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. + */ + static List> splitIntoSections( + List pickedFiles, int sortFieldIndex, DataType sortFieldType) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> sections = new ArrayList<>(); + List currentSection = new ArrayList<>(); + currentSection.add(pickedFiles.get(0)); + BinaryRow sectionMaxBound = pickedFiles.get(0).partitionStats().maxValues(); + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sections.add(currentSection); + currentSection = new ArrayList<>(); + currentSection.add(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentSection.add(file); + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add(currentSection); + return sections; + } + + /** + * Sort and rewrite full compaction manifests. Files are sorted by min partition value, then + * processed in batches. A batch stops when total size reaches threshold or when current max + * doesn't overlap with next min. Each batch is sorted, deduplicated (DELETE entries removed), + * and written to new manifest files. + */ + private static List sortAndRewriteFullCompaction( + List fullCompactionManifests, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + long suggestedMetaSize, + @Nullable Set deletedIdentifiers) + throws Exception { + + // Sort by min partition value + fullCompactionManifests.sort( + (a, b) -> + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType)); + + List result = new ArrayList<>(); + List batch = new ArrayList<>(); + long batchSize = 0; + + for (int i = 0; i < fullCompactionManifests.size(); i++) { + ManifestFileMeta current = fullCompactionManifests.get(i); + boolean shouldFlush = false; + + // Check if batch size reaches threshold + if (batchSize + current.fileSize() >= suggestedMetaSize && !batch.isEmpty()) { + shouldFlush = true; + } + + // Check if current max overlaps with next min + if (i < fullCompactionManifests.size() - 1 && !batch.isEmpty()) { + ManifestFileMeta next = fullCompactionManifests.get(i + 1); + int cmp = + compareField( + current.partitionStats().maxValues(), + next.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp < 0) { + shouldFlush = true; + } + } + + batch.add(current); + batchSize += current.fileSize(); + + if (shouldFlush || i == fullCompactionManifests.size() - 1) { + // Process batch: sort entries, remove DELETE, write out + List rewritten = + sortAndRewriteSection( + batch, + manifestFile, + sortFieldIndex, + sortFieldType, + deletedIdentifiers); + result.addAll(rewritten); + batch.clear(); + batchSize = 0; + } + } + + return result; + } + + /** + * Read all entries from a section's manifest files, sort them in memory by the specified + * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving + * entries to the rolling writer. + */ + private static List sortAndRewriteSection( + List section, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deletedIdentifiers) + throws Exception { + + List allEntries = new ArrayList<>(); + for (ManifestFileMeta meta : section) { + allEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } + + allEntries.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + Set safeDeletedIds = + deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : allEntries) { + if (entry.kind() == FileKind.ADD && !safeDeletedIds.contains(entry.identifier())) { + writer.write(entry); + } + } + } finally { + writer.close(); + } + return writer.result(); + } + + /** Parse a long option from table options with a default value. */ + private static long parseLongOption(Options options, String key, long defaultValue) { + String value = options.get(key); + if (value == null || value.isEmpty()) { + return defaultValue; + } + try { + return Long.parseLong(value.trim()); + } catch (NumberFormatException e) { + LOG.warn( + "Invalid long value '{}' for option '{}', using default {}.", + value, + key, + defaultValue); + return defaultValue; + } + } + private static class FullCompactionReadResult { private final ManifestFileMeta file; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java new file mode 100644 index 000000000000..cc88417b2765 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Pick strategy for manifest LSM Tree compaction. + * + *

Strategy priority: + * + *

    + *
  1. SizeAmp: if all lower-level runs' total size exceeds the highest-level run's size + * times {@code sizeAmpThreshold}, trigger full compaction (pick all runs). + *
  2. SizeRatio: from low to high, pick adjacent runs whose amplification factor is less + * than {@code sizeRatioThreshold}. + *
  3. Forced pick: level0 and level1 runs are always picked. + *
  4. Delete pick: additionally pick runs containing manifest files with {@code + * numDeletedFiles > 0}. + *
+ */ +public class ManifestPickStrategy { + + private final int sizeAmpThreshold; + private final int sizeRatioThreshold; + + public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { + this.sizeAmpThreshold = sizeAmpThreshold; + this.sizeRatioThreshold = sizeRatioThreshold; + } + + /** + * Pick runs that need compaction from the given level runs. + * + * @param levelRuns runs with assigned levels (level 0~4) + * @return list of picked runs to compact + */ + public List pick(List levelRuns) { + if (levelRuns.isEmpty()) { + return new ArrayList<>(); + } + + // Try SizeAmp first + List sizeAmpResult = pickForSizeAmp(levelRuns); + if (sizeAmpResult != null) { + return sizeAmpResult; + } + + // SizeRatio + forced pick + return pickForSizeRatioAndForce(levelRuns); + } + + /** + * SizeAmp check: if all lower-level (0~3) runs' total size > highest-level (level4) run's size + * * sizeAmpThreshold, pick all runs for full compaction. + */ + private List pickForSizeAmp(List levelRuns) { + int maxLevel = -1; + ManifestSortedRun highestRun = null; + long lowerLevelTotalSize = 0; + + for (ManifestSortedRun run : levelRuns) { + if (run.level() > maxLevel) { + maxLevel = run.level(); + highestRun = run; + } + } + + if (highestRun == null || maxLevel <= 0) { + return null; + } + + for (ManifestSortedRun run : levelRuns) { + if (run.level() < maxLevel) { + lowerLevelTotalSize += run.totalSize(); + } + } + + if (lowerLevelTotalSize > highestRun.totalSize() * sizeAmpThreshold) { + return new ArrayList<>(levelRuns); + } + return null; + } + + /** + * SizeRatio + forced pick. + * + *
    + *
  • Level0 and level1 are always picked. + *
  • From low to high, if the cumulative picked size * sizeRatioThreshold >= next run's + * size, continue picking. + *
+ */ + private List pickForSizeRatioAndForce(List levelRuns) { + // Sort by level ascending for low-to-high traversal + List sorted = new ArrayList<>(levelRuns); + sorted.sort(Comparator.comparingInt(ManifestSortedRun::level)); + + Set pickedSet = new HashSet<>(); + long pickedSize = 0; + + // From low to high: forced pick level0/level1, then SizeRatio for the rest. + for (ManifestSortedRun run : sorted) { + if (run.level() <= 1) { + pickedSet.add(run); + pickedSize += run.totalSize(); + } else { + long nextRunSize = run.totalSize(); + if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { + pickedSet.add(run); + pickedSize += nextRunSize; + } + } + } + + return new ArrayList<>(pickedSet); + } +} diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java new file mode 100644 index 000000000000..49baabfe7161 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.utils.Preconditions; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * A {@code ManifestSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single partition + * field (the configured manifest sort field). The intervals {@code [partitionStats.minValues[k], + * partitionStats.maxValues[k]]} of these manifests do not overlap on field {@code k}, where {@code + * k} is the configured sort field index. + */ +public class ManifestSortedRun { + + private int level; + private final List files; + private final long totalSize; + + private ManifestSortedRun(List files) { + this.level = -1; + this.files = Collections.unmodifiableList(files); + long size = 0L; + for (ManifestFileMeta file : files) { + size += file.fileSize(); + } + this.totalSize = size; + } + + public static ManifestSortedRun empty() { + return new ManifestSortedRun(Collections.emptyList()); + } + + public static ManifestSortedRun fromSingle(ManifestFileMeta file) { + return new ManifestSortedRun(Collections.singletonList(file)); + } + + /** + * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that + * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that + * intervals do not overlap on that field. + */ + public static ManifestSortedRun fromSorted(List sortedFiles) { + return new ManifestSortedRun(sortedFiles); + } + + public List files() { + return files; + } + + public boolean isEmpty() { + return files.isEmpty(); + } + + public boolean nonEmpty() { + return !isEmpty(); + } + + public long totalSize() { + return totalSize; + } + + public int level() { + return level; + } + + public void setLevel(int level) { + this.level = level; + } + + /** + * Validate that this run is monotonically non-overlapping on the sort field at {@code + * sortFieldIndex}. Used in tests and as an assertion in development. + */ + public void validate(int sortFieldIndex, Comparator partitionComparator) { + for (int i = 1; i < files.size(); i++) { + BinaryRow prevMax = files.get(i - 1).partitionStats().maxValues(); + BinaryRow currMin = files.get(i).partitionStats().minValues(); + Preconditions.checkState( + partitionComparator.compare(prevMax, currMin) <= 0, + "ManifestSortedRun is not sorted on field %s; prev.max > curr.min", + sortFieldIndex); + } + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ManifestSortedRun)) { + return false; + } + ManifestSortedRun that = (ManifestSortedRun) o; + return level == that.level && files.equals(that.files); + } + + @Override + public int hashCode() { + return Objects.hash(level, files); + } + + @Override + public String toString() { + return "ManifestSortedRun{level=" + + level + + ", files=[" + + files.stream().map(ManifestFileMeta::fileName).collect(Collectors.joining(", ")) + + "]}"; + } +} diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 36b0d15f114f..3b6bc379840b 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -18,12 +18,14 @@ package org.apache.paimon.manifest; +import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.fs.Path; import org.apache.paimon.fs.SeekableInputStream; import org.apache.paimon.fs.SeekableInputStreamWrapper; import org.apache.paimon.fs.local.LocalFileIO; import org.apache.paimon.operation.ManifestFileMerger; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; @@ -84,9 +86,16 @@ public void testMergeWithoutFullCompaction(int numLastBits) { createData(numLastBits, input, expected); // no trigger Full Compaction + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "9223372036854775807B"); List actual = ManifestFileMerger.merge( - input, manifestFile, 500, 3, Long.MAX_VALUE, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertThat(actual).hasSameSizeAs(expected); // these two manifest files are merged from the input @@ -118,14 +127,16 @@ private void testCleanUp(List input, long fullCompactionThresh ManifestFile failingManifestFile = createManifestFile(FailingFileIO.getFailingPath(failingName, tempDir.toString())); try { + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set( + "manifest.full-compaction-threshold-size", fullCompactionThreshold + "B"); ManifestFileMerger.merge( input, failingManifestFile, - 500, - 3, - fullCompactionThreshold, getPartitionType(), - null); + CoreOptions.fromMap(testOptions.toMap())); } catch (Throwable e) { assertThat(e).hasRootCauseExactlyInstanceOf(FailingFileIO.ArtificialException.class); // old files should be kept untouched, while new files should be cleaned up @@ -156,9 +167,16 @@ public void testMerge() { // delta with delete apply partition 1,2 addDeltaManifests(input, true); // trigger full compaction + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); // 1st Manifest don't need to Merge assertSameContent(input.get(0), merged.get(0), manifestFile); @@ -173,9 +191,16 @@ public void testMergeWithoutDelta() { // base List input = createBaseManifestFileMetas(true); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); assertThat(merged).hasSameElementsAs(input); @@ -186,9 +211,16 @@ public void testMergeWithoutDelta() { ManifestFileMeta delta = makeManifest(makeEntry(true, "A", 1), makeEntry(false, "A", 1)); input1.add(delta); + Options testOptions1 = new Options(); + testOptions1.set("manifest.target-file-size", "500B"); + testOptions1.set("manifest.merge-min-count", "3"); + testOptions1.set("manifest.full-compaction-threshold-size", "200B"); List merged1 = ManifestFileMerger.merge( - input1, manifestFile, 500, 3, 200, getPartitionType(), null); + input1, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions1.toMap())); assertThat(base).hasSameElementsAs(merged1); assertEquivalentEntries(input1, merged1); @@ -198,9 +230,16 @@ public void testMergeWithoutDelta() { public void testMergeWithoutBase() { List input = new ArrayList<>(); addDeltaManifests(input, true); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); } @@ -225,9 +264,16 @@ public void testMergeWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F"))); input.add(makeManifest(makeEntry(true, "G"))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); } @@ -489,9 +535,16 @@ public void testMergeFullCompactionWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F"))); input.add(makeManifest(makeEntry(true, "G"))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", threshold + "B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, threshold, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries( input.stream() .filter(f -> !baseFiles.contains(f.fileName())) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java index 591b3206518d..66465f1e7531 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java @@ -18,7 +18,9 @@ package org.apache.paimon.manifest; +import org.apache.paimon.CoreOptions; import org.apache.paimon.operation.ManifestFileMerger; +import org.apache.paimon.options.Options; import org.apache.paimon.types.RowType; import org.junit.jupiter.api.BeforeEach; @@ -49,9 +51,16 @@ public void testMerge() { List input = createBaseManifestFileMetas(false); addDeltaManifests(input, false); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); // the first one is not deleted, it should not be merged @@ -89,9 +98,16 @@ public void testMergeFullCompactionWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F", null))); input.add(makeManifest(makeEntry(true, "G", null))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", threshold + "B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, threshold, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries( input.stream() .filter(f -> !baseFiles.contains(f.fileName())) From d8f515f140e9a3e39d8cfbca0775a206ab84baa7 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 19:39:24 +0800 Subject: [PATCH 02/48] fix --- .../java/org/apache/paimon/operation/ManifestFileMerger.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 3d7cccd5b34b..fba89c3b7626 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -22,6 +22,7 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; From 987d3d20aa85e98826af2eb0679393864ab9d18e Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 20:38:25 +0800 Subject: [PATCH 03/48] addTest --- .../paimon/operation/ManifestFileMerger.java | 9 +- .../paimon/manifest/ManifestFileMetaTest.java | 398 ++++++++++++++++++ 2 files changed, 403 insertions(+), 4 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fba89c3b7626..17c14258599d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -441,9 +441,10 @@ private static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - + List result = new ArrayList<>(reusedFiles); if (pickedRuns.isEmpty()) { - return Optional.of(new ArrayList<>(input)); + result.addAll(fullCompactionRewritten); + return Optional.of(new ArrayList<>(result)); } // Step 7: Split picked files into sections, sort and rewrite each. @@ -458,7 +459,7 @@ private static Optional> trySortRewrite( parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); long processedSize = 0; - List result = new ArrayList<>(reusedFiles); + List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; @@ -473,7 +474,7 @@ private static Optional> trySortRewrite( List merged = sortAndRewriteSection( - section, manifestFile, sortFieldIndex, sortFieldType, deleteEntries); + section, manifestFile, sortFieldIndex, sortFieldType, null); sortNewFiles.addAll(merged); result.addAll(merged); } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 3b6bc379840b..f4adf35802e4 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -872,4 +872,402 @@ private void beforeFirstRead() throws IOException { } } } + + // ==================== Manifest Sort Tests ==================== + + /** + * Test manifest sort with overlapping partition ranges. Each manifest contains entries spanning + * multiple partitions, creating overlapping intervals that require sort rewrite to resolve. + * + *

Input manifests (deliberately unordered and overlapping): + * + *

+     *   manifest-A: partitions [5, 9]  (entries in partition 5,6,7,8,9)
+     *   manifest-B: partitions [0, 4]  (entries in partition 0,1,2,3,4)
+     *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
+     *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
+     *   manifest-E: partitions [1, 3]  (entries in partition 1,2,3) -- overlaps B and C
+     *   manifest-F: partitions [10, 14](entries in partition 10,11,12,13,14) -- overlaps D
+     * 
+ * + *

After sort rewrite, all surviving ADD entries should be sorted by partition field. + */ + @Test + public void testManifestSortWithOverlappingPartitions() { + List input = new ArrayList<>(); + + // manifest-A: partitions [5, 9] + List entriesA = new ArrayList<>(); + for (int p = 5; p <= 9; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); + + // manifest-B: partitions [0, 4] + List entriesB = new ArrayList<>(); + for (int p = 0; p <= 4; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); + + // manifest-C: partitions [3, 7] -- overlaps with A and B + List entriesC = new ArrayList<>(); + for (int p = 3; p <= 7; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + input.add(makeManifest(entriesC.toArray(new ManifestEntry[0]))); + + // manifest-D: partitions [8, 12] -- overlaps with A + List entriesD = new ArrayList<>(); + for (int p = 8; p <= 12; p++) { + entriesD.add(makeEntry(true, String.format("D-p%d", p), p)); + } + input.add(makeManifest(entriesD.toArray(new ManifestEntry[0]))); + + // manifest-E: partitions [1, 3] -- overlaps with B and C + List entriesE = new ArrayList<>(); + for (int p = 1; p <= 3; p++) { + entriesE.add(makeEntry(true, String.format("E-p%d", p), p)); + } + input.add(makeManifest(entriesE.toArray(new ManifestEntry[0]))); + + // manifest-F: partitions [10, 14] -- overlaps with D + List entriesF = new ArrayList<>(); + for (int p = 10; p <= 14; p++) { + entriesF.add(makeEntry(true, String.format("F-p%d", p), p)); + } + input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify entries are equivalent (no data loss) + assertEquivalentEntries(input, merged); + + // Verify all entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within a manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify manifest files themselves are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test manifest sort with more manifests having overlapping partition ranges. Creates a larger + * number of manifests in shuffled order to stress-test the sort rewrite logic. + * + *

Input manifests (shuffled, all ADD-only): + * + *

+     *   manifest-1: partitions [6, 10]
+     *   manifest-2: partitions [0, 3]
+     *   manifest-3: partitions [4, 8]  -- overlaps 1 and 2
+     *   manifest-4: partitions [9, 14] -- overlaps 1
+     *   manifest-5: partitions [2, 5]  -- overlaps 2 and 3
+     *   manifest-6: partitions [11, 15]-- overlaps 4
+     * 
+ */ + @Test + public void testManifestSortWithShuffledOverlappingPartitions() { + List input = new ArrayList<>(); + + // manifest-1: partitions [6, 10] + List entries1 = new ArrayList<>(); + for (int p = 6; p <= 10; p++) { + entries1.add(makeEntry(true, String.format("m1-p%d", p), p)); + } + input.add(makeManifest(entries1.toArray(new ManifestEntry[0]))); + + // manifest-2: partitions [0, 3] + List entries2 = new ArrayList<>(); + for (int p = 0; p <= 3; p++) { + entries2.add(makeEntry(true, String.format("m2-p%d", p), p)); + } + input.add(makeManifest(entries2.toArray(new ManifestEntry[0]))); + + // manifest-3: partitions [4, 8] -- overlaps manifest-1 and manifest-2 + List entries3 = new ArrayList<>(); + for (int p = 4; p <= 8; p++) { + entries3.add(makeEntry(true, String.format("m3-p%d", p), p)); + } + input.add(makeManifest(entries3.toArray(new ManifestEntry[0]))); + + // manifest-4: partitions [9, 14] -- overlaps manifest-1 + List entries4 = new ArrayList<>(); + for (int p = 9; p <= 14; p++) { + entries4.add(makeEntry(true, String.format("m4-p%d", p), p)); + } + input.add(makeManifest(entries4.toArray(new ManifestEntry[0]))); + + // manifest-5: partitions [2, 5] -- overlaps manifest-2 and manifest-3 + List entries5 = new ArrayList<>(); + for (int p = 2; p <= 5; p++) { + entries5.add(makeEntry(true, String.format("m5-p%d", p), p)); + } + input.add(makeManifest(entries5.toArray(new ManifestEntry[0]))); + + // manifest-6: partitions [11, 15] -- overlaps manifest-4 + List entries6 = new ArrayList<>(); + for (int p = 11; p <= 15; p++) { + entries6.add(makeEntry(true, String.format("m6-p%d", p), p)); + } + input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "100B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within a manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify output manifests are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This + * exercises buildLevelSortedRuns and the LSM level assignment logic. + * + *

Creates manifests whose partition ranges overlap in various ways: + * + *

+     *   run1 (non-overlapping): [0,2], [3,5], [6,8]
+     *   run2 (overlapping with run1): [1,4], [5,7]
+     *   run3 (overlapping with both): [0,9]
+     * 
+ */ + @Test + public void testManifestSortWithMultipleOverlappingRuns() { + List input = new ArrayList<>(); + + // Run1: non-overlapping within itself [0,2], [3,5], [6,8] + input.add( + makeManifest( + makeEntry(true, "r1a-p0", 0), + makeEntry(true, "r1a-p1", 1), + makeEntry(true, "r1a-p2", 2))); + input.add( + makeManifest( + makeEntry(true, "r1b-p3", 3), + makeEntry(true, "r1b-p4", 4), + makeEntry(true, "r1b-p5", 5))); + input.add( + makeManifest( + makeEntry(true, "r1c-p6", 6), + makeEntry(true, "r1c-p7", 7), + makeEntry(true, "r1c-p8", 8))); + + // Run2: overlaps with run1 [1,4], [5,7] + input.add( + makeManifest( + makeEntry(true, "r2a-p1", 1), + makeEntry(true, "r2a-p2", 2), + makeEntry(true, "r2a-p3", 3), + makeEntry(true, "r2a-p4", 4))); + input.add( + makeManifest( + makeEntry(true, "r2b-p5", 5), + makeEntry(true, "r2b-p6", 6), + makeEntry(true, "r2b-p7", 7))); + + // Run3: a large manifest overlapping everything [0,9] + List run3Entries = new ArrayList<>(); + for (int p = 0; p <= 9; p++) { + run3Entries.add(makeEntry(true, String.format("r3-p%d", p), p)); + } + input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "100B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as( + "Entries within manifest should be sorted, but found %d after %d", + currPartition, prevPartition) + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify output manifests are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test that sort rewrite correctly eliminates DELETE entries and their corresponding ADD + * entries. The key condition is that totalDeltaFileSize must reach manifestFullCompactionSize + * to trigger the full compaction path inside trySortRewrite, which reads deleteEntries and + * passes them to sortAndRewriteSection for elimination. + * + *

Design: + * + *

+     *   - Base manifests with overlapping partitions (all ADD, large enough to be "mustChange"
+     *     since fileSize < suggestedMetaSize):
+     *     manifest-A: partitions [0, 4] with entries A-p0..A-p4
+     *     manifest-B: partitions [2, 6] with entries B-p2..B-p6 (overlaps A)
+     *     manifest-C: partitions [5, 9] with entries C-p5..C-p9 (overlaps B)
+     *   - Delta manifests with DELETE entries (cancel some ADD entries):
+     *     manifest-D: DELETE A-p2, DELETE B-p4, ADD new-p2, ADD new-p4
+     *     manifest-E: DELETE C-p7, ADD new-p7
+     *   - After sort rewrite: A-p2, B-p4, C-p7 should be eliminated,
+     *     replaced by new-p2, new-p4, new-p7. Output should only contain ADD entries,
+     *     sorted by partition.
+     * 
+ */ + @Test + public void testManifestSortEliminatesDeleteEntries() { + List input = new ArrayList<>(); + + // manifest-A: partitions [0, 4] + List entriesA = new ArrayList<>(); + for (int p = 0; p <= 4; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); + + // manifest-B: partitions [2, 6] -- overlaps A + List entriesB = new ArrayList<>(); + for (int p = 2; p <= 6; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); + + // manifest-C: partitions [5, 9] -- overlaps B + List entriesC = new ArrayList<>(); + for (int p = 5; p <= 9; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + input.add(makeManifest(entriesC.toArray(new ManifestEntry[0]))); + + // manifest-D: DELETE A-p2, DELETE B-p4, ADD new-p2, ADD new-p4 + input.add( + makeManifest( + makeEntry(false, "A-p2", 2), + makeEntry(false, "B-p4", 4), + makeEntry(true, "new-p2", 2), + makeEntry(true, "new-p4", 4))); + + // manifest-E: DELETE C-p7, ADD new-p7 + input.add(makeManifest(makeEntry(false, "C-p7", 7), makeEntry(true, "new-p7", 7))); + + Options testOptions = new Options(); + // Set target file size very large so all input manifests are considered "small" + // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition + testOptions.set("manifest.target-file-size", "16MB"); + testOptions.set("manifest.merge-min-count", "3"); + // Set full-compaction threshold very small to ensure it triggers + testOptions.set("manifest.full-compaction-threshold-size", "1B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Collect all output entries + List allOutputEntries = new ArrayList<>(); + for (ManifestFileMeta meta : merged) { + allOutputEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } + + // Verify: no DELETE entries in output (all DELETE pairs eliminated) + long deleteCount = + allOutputEntries.stream().filter(e -> e.kind() == FileKind.DELETE).count(); + assertThat(deleteCount).as("Sort rewrite should eliminate all DELETE entries").isEqualTo(0); + + // Verify: the deleted ADD entries (A-p2, B-p4, C-p7) are NOT in output + Set outputFileNames = + allOutputEntries.stream().map(e -> e.file().fileName()).collect(Collectors.toSet()); + assertThat(outputFileNames).doesNotContain("A-p2", "B-p4", "C-p7"); + + // Verify: the replacement entries (new-p2, new-p4, new-p7) ARE in output + assertThat(outputFileNames).contains("new-p2", "new-p4", "new-p7"); + + // Verify: all surviving entries match what FileEntry.mergeEntries would produce + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + } } From c49942351b1d9aadb1fab0341cfbdc9e58d94332 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 20:45:00 +0800 Subject: [PATCH 04/48] spotless --- .../java/org/apache/paimon/operation/ManifestFileMerger.java | 1 - 1 file changed, 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 17c14258599d..26d1a405284a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -459,7 +459,6 @@ private static Optional> trySortRewrite( parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); long processedSize = 0; - List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; From d6c38633fa6cd2ede797e8ecc82de86767489e34 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 21:32:15 +0800 Subject: [PATCH 05/48] fix --- .../java/org/apache/paimon/CoreOptions.java | 28 +++++ .../paimon/operation/ManifestFileMerger.java | 114 +++++++++++++----- 2 files changed, 113 insertions(+), 29 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 4217428d8097..a67ff6ca90d1 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -493,6 +493,26 @@ public InlineElement getDescription() { + " partition field). For multi-partition tables, REQUIRED" + " when 'manifest-sort.enable' is true."); + public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = + key("manifest-sort.max-rewrite-size") + .memoryType() + .defaultValue(MemorySize.ofMebiBytes(256)) + .withDescription( + "Maximum total size of manifest files to rewrite in a single" + + " sort rewrite pass. Sections exceeding this limit are" + + " skipped. Set to a larger value to allow more aggressive" + + " sort rewriting."); + + public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = + key("manifest-sort.open-file-cost") + .memoryType() + .defaultValue(MemorySize.ofMebiBytes(4)) + .withDescription( + "Open file cost of a manifest file during sort rewrite. " + + "It is added to each manifest file's size when computing " + + "section size, to avoid rewriting too many small manifest " + + "files in a single section."); + public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2597,6 +2617,14 @@ public String manifestSortPartitionField() { return options.get(MANIFEST_SORT_PARTITION_FIELD); } + public long manifestSortMaxRewriteSize() { + return options.get(MANIFEST_SORT_MAX_REWRITE_SIZE).getBytes(); + } + + public long manifestSortOpenFileCost() { + return options.get(MANIFEST_SORT_OPEN_FILE_COST).getBytes(); + } + public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 26d1a405284a..74d887285d5a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -75,7 +75,6 @@ public static List merge( int suggestedMinMetaCount = options.manifestMergeMinCount(); long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); - Options tableOptions = options.toConfiguration(); // these are the newly created manifest files, clean them up if exception occurs List newFilesForAbort = new ArrayList<>(); @@ -84,8 +83,7 @@ public static List merge( Optional> merged; // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite - if (tableOptions.getBoolean("manifest-sort.enable", false) - && partitionType.getFieldCount() > 0) { + if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { merged = trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); @@ -417,7 +415,8 @@ private static Optional> trySortRewrite( sortFieldIndex, sortFieldType, suggestedMetaSize, - deleteEntries); + deleteEntries, + manifestReadParallelism); newFilesForAbort.addAll(fullCompactionRewritten); } @@ -428,8 +427,8 @@ private static Optional> trySortRewrite( : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); // Step 6: Pick runs to compact. - int sizeAmpThreshold = tableOptions.getInteger("manifest-sort.size-amp-threshold", 2); - int sizeRatioThreshold = tableOptions.getInteger("manifest-sort.size-ratio-threshold", 10); + int sizeAmpThreshold = options.maxSizeAmplificationPercent(); + int sizeRatioThreshold = options.sortedRunSizeRatio(); ManifestPickStrategy pickStrategy = new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); List pickedRuns = pickStrategy.pick(levelRuns); @@ -455,15 +454,15 @@ private static Optional> trySortRewrite( List> sections = splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); - long maxRewriteSize = - parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); long processedSize = 0; + long openFileCost = options.manifestSortOpenFileCost(); List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; for (ManifestFileMeta m : section) { - sectionSize += m.fileSize(); + sectionSize += m.fileSize() + openFileCost; } if (processedSize + sectionSize > maxRewriteSize) { result.addAll(section); @@ -473,7 +472,12 @@ private static Optional> trySortRewrite( List merged = sortAndRewriteSection( - section, manifestFile, sortFieldIndex, sortFieldType, null); + section, + manifestFile, + sortFieldIndex, + sortFieldType, + null, + manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); } @@ -696,7 +700,8 @@ private static List sortAndRewriteFullCompaction( int sortFieldIndex, DataType sortFieldType, long suggestedMetaSize, - @Nullable Set deletedIdentifiers) + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) throws Exception { // Sort by min partition value @@ -746,7 +751,8 @@ private static List sortAndRewriteFullCompaction( manifestFile, sortFieldIndex, sortFieldType, - deletedIdentifiers); + deletedIdentifiers, + manifestReadParallelism); result.addAll(rewritten); batch.clear(); batchSize = 0; @@ -759,38 +765,88 @@ private static List sortAndRewriteFullCompaction( /** * Read all entries from a section's manifest files, sort them in memory by the specified * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to the rolling writer. + * entries to the rolling writer. Manifest files without delete entries and without cancelled + * ADD entries are kept as-is. + * + *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as + * {@link #tryFullCompaction}. */ private static List sortAndRewriteSection( List section, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deletedIdentifiers) + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) throws Exception { - List allEntries = new ArrayList<>(); - for (ManifestFileMeta meta : section) { - allEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); - } - - allEntries.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - Set safeDeletedIds = deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - try { - for (ManifestEntry entry : allEntries) { - if (entry.kind() == FileKind.ADD && !safeDeletedIds.contains(entry.identifier())) { + // Parallel read: each meta is read independently + Function> reader = + meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + + List result = new ArrayList<>(); + List entriesToRewrite = new ArrayList<>(); + + for (FullCompactionReadResult readResult : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + if (readResult.requireChange) { + entriesToRewrite.addAll(readResult.entries); + } else { + result.add(readResult.file); + } + } + + if (!entriesToRewrite.isEmpty()) { + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : entriesToRewrite) { writer.write(entry); } + } finally { + writer.close(); } - } finally { - writer.close(); + result.addAll(writer.result()); + } + + return result; + } + + /** + * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD + * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is + * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + */ + private static FullCompactionReadResult readForSortRewrite( + ManifestFileMeta meta, + ManifestFile manifestFile, + Set deletedIdentifiers) { + if (meta.numDeletedFiles() > 0) { + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD + && !deletedIdentifiers.contains(entry.identifier())) { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, true, entries); + } else { + boolean requireChange = false; + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (deletedIdentifiers.contains(entry.identifier())) { + requireChange = true; + } else { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, requireChange, entries); } - return writer.result(); } /** Parse a long option from table options with a default value. */ From 256f0a27fc3125c7dc793973e4d317990f01e7a4 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 00:01:24 +0800 Subject: [PATCH 06/48] fix --- .../generated/core_configuration.html | 24 +++ .../paimon/operation/ManifestFileMerger.java | 190 ++++++++---------- .../operation/ManifestPickStrategy.java | 36 ++-- .../paimon/operation/ManifestSortedRun.java | 34 ---- 4 files changed, 120 insertions(+), 164 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 7496cbfc15c8..14363c7ffd02 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -903,6 +903,30 @@ Integer To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge. + +

manifest-sort.enable
+ false + Boolean + Whether to invoke manifest sort rewrite right after manifest merge during commit. The sort rewrite implementation is provided by an external module (e.g. morax) and discovered via ServiceLoader. When no implementation is registered on the classpath, this flag has no effect (manifest sort is silently skipped). + + +
manifest-sort.max-rewrite-size
+ 256 mb + MemorySize + Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. + + +
manifest-sort.open-file-cost
+ 4 mb + MemorySize + Open file cost of a manifest file during sort rewrite. It is added to each manifest file's size when computing section size, to avoid rewriting too many small manifest files in a single section. + + +
manifest-sort.partition-field
+ (none) + String + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enable' is true. +
manifest.target-file-size
8 mb diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 74d887285d5a..2363595d7168 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -26,7 +26,6 @@ import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; @@ -341,16 +340,12 @@ private static Optional> trySortRewrite( long suggestedMetaSize = options.manifestTargetSize().getBytes(); long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); - Options tableOptions = options.toConfiguration(); - + String sortPartitionField = options.manifestSortPartitionField(); // Step 1: Resolve sort field. - String sortField = resolveSortField(tableOptions.toMap(), partitionType); + String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { - LOG.warn( - "Cannot resolve sort field for manifest sort rewrite. " - + "Skipping sort. Configure 'manifest-sort.partition-field'" - + " for multi-partition tables."); - return Optional.of(input); + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite. "); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); @@ -488,87 +483,6 @@ private static Optional> trySortRewrite( // ==================== Sort Rewrite Helpers ==================== - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - - /** - * Resolve the partition field to sort manifests by. - * - *

Resolution rules: - * - *

    - *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. - *
- */ - @Nullable - static String resolveSortField(Map tableOptions, RowType partitionType) { - String configured = tableOptions.get("manifest-sort.partition-field"); - if (configured != null && !configured.isEmpty()) { - return configured; - } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; - } - /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -816,7 +730,85 @@ private static List sortAndRewriteSection( return result; } + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(String sortPartitionField, RowType partitionType) { + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + return sortPartitionField; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } /** * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is @@ -849,24 +841,6 @@ private static FullCompactionReadResult readForSortRewrite( } } - /** Parse a long option from table options with a default value. */ - private static long parseLongOption(Options options, String key, long defaultValue) { - String value = options.get(key); - if (value == null || value.isEmpty()) { - return defaultValue; - } - try { - return Long.parseLong(value.trim()); - } catch (NumberFormatException e) { - LOG.warn( - "Invalid long value '{}' for option '{}', using default {}.", - value, - key, - defaultValue); - return defaultValue; - } - } - private static class FullCompactionReadResult { private final ManifestFileMeta file; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index cc88417b2765..6421328550c9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -19,10 +19,7 @@ package org.apache.paimon.operation; import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashSet; import java.util.List; -import java.util.Set; /** * Pick strategy for manifest LSM Tree compaction. @@ -75,21 +72,19 @@ public List pick(List levelRuns) { * * sizeAmpThreshold, pick all runs for full compaction. */ private List pickForSizeAmp(List levelRuns) { - int maxLevel = -1; - ManifestSortedRun highestRun = null; - long lowerLevelTotalSize = 0; - - for (ManifestSortedRun run : levelRuns) { - if (run.level() > maxLevel) { - maxLevel = run.level(); - highestRun = run; - } + if (levelRuns.isEmpty()) { + return null; } - if (highestRun == null || maxLevel <= 0) { + // The last run has the highest level (set by buildLevelSortedRuns) + ManifestSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); + int maxLevel = highestRun.level(); + + if (maxLevel <= 0) { return null; } + long lowerLevelTotalSize = 0; for (ManifestSortedRun run : levelRuns) { if (run.level() < maxLevel) { lowerLevelTotalSize += run.totalSize(); @@ -112,27 +107,24 @@ private List pickForSizeAmp(List levelRuns * */ private List pickForSizeRatioAndForce(List levelRuns) { - // Sort by level ascending for low-to-high traversal - List sorted = new ArrayList<>(levelRuns); - sorted.sort(Comparator.comparingInt(ManifestSortedRun::level)); - - Set pickedSet = new HashSet<>(); + // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) + List picked = new ArrayList<>(); long pickedSize = 0; // From low to high: forced pick level0/level1, then SizeRatio for the rest. - for (ManifestSortedRun run : sorted) { + for (ManifestSortedRun run : levelRuns) { if (run.level() <= 1) { - pickedSet.add(run); + picked.add(run); pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { - pickedSet.add(run); + picked.add(run); pickedSize += nextRunSize; } } } - return new ArrayList<>(pickedSet); + return picked; } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java index 49baabfe7161..c270677e1f8d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java @@ -18,12 +18,9 @@ package org.apache.paimon.operation; -import org.apache.paimon.data.BinaryRow; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.utils.Preconditions; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; @@ -50,14 +47,6 @@ private ManifestSortedRun(List files) { this.totalSize = size; } - public static ManifestSortedRun empty() { - return new ManifestSortedRun(Collections.emptyList()); - } - - public static ManifestSortedRun fromSingle(ManifestFileMeta file) { - return new ManifestSortedRun(Collections.singletonList(file)); - } - /** * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that @@ -71,14 +60,6 @@ public List files() { return files; } - public boolean isEmpty() { - return files.isEmpty(); - } - - public boolean nonEmpty() { - return !isEmpty(); - } - public long totalSize() { return totalSize; } @@ -91,21 +72,6 @@ public void setLevel(int level) { this.level = level; } - /** - * Validate that this run is monotonically non-overlapping on the sort field at {@code - * sortFieldIndex}. Used in tests and as an assertion in development. - */ - public void validate(int sortFieldIndex, Comparator partitionComparator) { - for (int i = 1; i < files.size(); i++) { - BinaryRow prevMax = files.get(i - 1).partitionStats().maxValues(); - BinaryRow currMin = files.get(i).partitionStats().minValues(); - Preconditions.checkState( - partitionComparator.compare(prevMax, currMin) <= 0, - "ManifestSortedRun is not sorted on field %s; prev.max > curr.min", - sortFieldIndex); - } - } - @Override public boolean equals(Object o) { if (!(o instanceof ManifestSortedRun)) { From 448fc5084ee31abb7dc2e0d558614992e3c89fe7 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:21:38 +0800 Subject: [PATCH 07/48] fix --- .../paimon/operation/ManifestFileMerger.java | 564 +------------ .../paimon/operation/ManifestFileSorter.java | 745 ++++++++++++++++++ .../paimon/manifest/ManifestFileMetaTest.java | 10 - 3 files changed, 765 insertions(+), 554 deletions(-) create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 2363595d7168..83ef75a0335a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -22,13 +22,10 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; -import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.types.DataType; -import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -38,7 +35,6 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -79,26 +75,25 @@ public static List merge( List newFilesForAbort = new ArrayList<>(); try { - Optional> merged; - // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { - merged = - trySortRewrite( + Optional> sorted = + ManifestFileSorter.trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); - } else { - // Otherwise try full compaction first, then minor compaction if needed - merged = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); + return sorted.orElse(input); } + // Otherwise try full compaction first, then minor compaction if needed + Optional> merged = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + return merged.orElseGet( () -> tryMinorCompaction( @@ -314,7 +309,7 @@ private static FullCompactionReadResult readForFullCompaction( return new FullCompactionReadResult(file, requireChange, entries); } - private static Set computeDeletePartitions(Set deleteEntries) { + static Set computeDeletePartitions(Set deleteEntries) { Set partitions = new HashSet<>(); for (FileEntry.Identifier identifier : deleteEntries) { partitions.add(identifier.partition); @@ -322,532 +317,13 @@ private static Set computeDeletePartitions(Set return partitions; } - // ==================== Manifest Sort Rewrite ==================== - - /** - * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort - * field cannot be resolved or the delta file size is below the full compaction threshold, the - * input is returned as-is. - */ - private static Optional> trySortRewrite( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) - throws Exception { - // Extract configuration from options - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); - // Step 1: Resolve sort field. - String sortField = resolveSortField(sortPartitionField, partitionType); - if (sortField == null) { - throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite. "); - } - int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); - - // Step 2: Check full compact trigger. - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } - - List fullCompactionManifests = new ArrayList<>(); - List lsmFiles = new LinkedList<>(input); - Set deleteEntries = null; - if (totalDeltaFileSize >= manifestFullCompactionSize) { - // Step 3: Read delete entries and build partition predicate. - deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; - } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); - } else { - predicate = PartitionPredicate.ALWAYS_TRUE; - } - } - - // Step 4: Classify input into level0 runs and LSM files. - Iterator iterator = lsmFiles.iterator(); - while (iterator.hasNext()) { - ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { - iterator.remove(); - fullCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - fullCompactionManifests.add(file); - } - } - } - - // Process full compaction manifests separately: sort, deduplicate, and rewrite - List fullCompactionRewritten = new ArrayList<>(); - if (!fullCompactionManifests.isEmpty()) { - fullCompactionRewritten = - sortAndRewriteFullCompaction( - fullCompactionManifests, - manifestFile, - sortFieldIndex, - sortFieldType, - suggestedMetaSize, - deleteEntries, - manifestReadParallelism); - newFilesForAbort.addAll(fullCompactionRewritten); - } - - // Step 5: Build LSM Tree and assign levels (only for lsmFiles). - List levelRuns = - lsmFiles.isEmpty() - ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); - - // Step 6: Pick runs to compact. - int sizeAmpThreshold = options.maxSizeAmplificationPercent(); - int sizeRatioThreshold = options.sortedRunSizeRatio(); - ManifestPickStrategy pickStrategy = - new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); - List pickedRuns = pickStrategy.pick(levelRuns); - - Set pickedSet = new HashSet<>(pickedRuns); - List reusedFiles = new ArrayList<>(); - for (ManifestSortedRun run : levelRuns) { - if (!pickedSet.contains(run)) { - reusedFiles.addAll(run.files()); - } - } - List result = new ArrayList<>(reusedFiles); - if (pickedRuns.isEmpty()) { - result.addAll(fullCompactionRewritten); - return Optional.of(new ArrayList<>(result)); - } - - // Step 7: Split picked files into sections, sort and rewrite each. - List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { - pickedFiles.addAll(run.files()); - } - - List> sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); - long maxRewriteSize = options.manifestSortMaxRewriteSize(); - long processedSize = 0; - - long openFileCost = options.manifestSortOpenFileCost(); - List sortNewFiles = new ArrayList<>(); - for (List section : sections) { - long sectionSize = 0; - for (ManifestFileMeta m : section) { - sectionSize += m.fileSize() + openFileCost; - } - if (processedSize + sectionSize > maxRewriteSize) { - result.addAll(section); - continue; - } - processedSize += sectionSize; - - List merged = - sortAndRewriteSection( - section, - manifestFile, - sortFieldIndex, - sortFieldType, - null, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } - newFilesForAbort.addAll(sortNewFiles); - result.addAll(fullCompactionRewritten); - return Optional.of(result); - } - - // ==================== Sort Rewrite Helpers ==================== - - /** - * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, - * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 - * largest to level 1~4, rest to level 0). - */ - static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { - input.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List> runFilesList = new ArrayList<>(); - List currentRun = new ArrayList<>(); - currentRun.add(input.get(0)); - for (int i = 1; i < input.size(); i++) { - ManifestFileMeta file = input.get(i); - ManifestFileMeta last = currentRun.get(currentRun.size() - 1); - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - > 0) { - currentRun.add(file); - } else { - runFilesList.add(currentRun); - currentRun = new ArrayList<>(); - currentRun.add(file); - } - } - runFilesList.add(currentRun); - - List runs = new ArrayList<>(runFilesList.size()); - for (List rf : runFilesList) { - runs.add(ManifestSortedRun.fromSorted(rf)); - } - - runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = runs.size(); - for (int i = 0; i < n; i++) { - if (i >= n - 4) { - runs.get(i).setLevel(n - i); - } else { - runs.get(i).setLevel(0); - } - } - return runs; - } - - /** - * Split picked files into sections. Files with overlapping sort-key intervals go into the same - * section. - */ - static List> splitIntoSections( - List pickedFiles, int sortFieldIndex, DataType sortFieldType) { - pickedFiles.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List> sections = new ArrayList<>(); - List currentSection = new ArrayList<>(); - currentSection.add(pickedFiles.get(0)); - BinaryRow sectionMaxBound = pickedFiles.get(0).partitionStats().maxValues(); - for (int i = 1; i < pickedFiles.size(); i++) { - ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sections.add(currentSection); - currentSection = new ArrayList<>(); - currentSection.add(file); - sectionMaxBound = file.partitionStats().maxValues(); - } else { - currentSection.add(file); - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sectionMaxBound = file.partitionStats().maxValues(); - } - } - } - sections.add(currentSection); - return sections; - } - - /** - * Sort and rewrite full compaction manifests. Files are sorted by min partition value, then - * processed in batches. A batch stops when total size reaches threshold or when current max - * doesn't overlap with next min. Each batch is sorted, deduplicated (DELETE entries removed), - * and written to new manifest files. - */ - private static List sortAndRewriteFullCompaction( - List fullCompactionManifests, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - long suggestedMetaSize, - @Nullable Set deletedIdentifiers, - @Nullable Integer manifestReadParallelism) - throws Exception { - - // Sort by min partition value - fullCompactionManifests.sort( - (a, b) -> - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType)); - - List result = new ArrayList<>(); - List batch = new ArrayList<>(); - long batchSize = 0; - - for (int i = 0; i < fullCompactionManifests.size(); i++) { - ManifestFileMeta current = fullCompactionManifests.get(i); - boolean shouldFlush = false; - - // Check if batch size reaches threshold - if (batchSize + current.fileSize() >= suggestedMetaSize && !batch.isEmpty()) { - shouldFlush = true; - } - - // Check if current max overlaps with next min - if (i < fullCompactionManifests.size() - 1 && !batch.isEmpty()) { - ManifestFileMeta next = fullCompactionManifests.get(i + 1); - int cmp = - compareField( - current.partitionStats().maxValues(), - next.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp < 0) { - shouldFlush = true; - } - } - - batch.add(current); - batchSize += current.fileSize(); - - if (shouldFlush || i == fullCompactionManifests.size() - 1) { - // Process batch: sort entries, remove DELETE, write out - List rewritten = - sortAndRewriteSection( - batch, - manifestFile, - sortFieldIndex, - sortFieldType, - deletedIdentifiers, - manifestReadParallelism); - result.addAll(rewritten); - batch.clear(); - batchSize = 0; - } - } - - return result; - } - - /** - * Read all entries from a section's manifest files, sort them in memory by the specified - * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to the rolling writer. Manifest files without delete entries and without cancelled - * ADD entries are kept as-is. - * - *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as - * {@link #tryFullCompaction}. - */ - private static List sortAndRewriteSection( - List section, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - @Nullable Set deletedIdentifiers, - @Nullable Integer manifestReadParallelism) - throws Exception { - - Set safeDeletedIds = - deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - - // Parallel read: each meta is read independently - Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); - - List result = new ArrayList<>(); - List entriesToRewrite = new ArrayList<>(); - - for (FullCompactionReadResult readResult : - sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - if (readResult.requireChange) { - entriesToRewrite.addAll(readResult.entries); - } else { - result.add(readResult.file); - } - } - - if (!entriesToRewrite.isEmpty()) { - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - - RollingFileWriter writer = - manifestFile.createRollingWriter(); - try { - for (ManifestEntry entry : entriesToRewrite) { - writer.write(entry); - } - } finally { - writer.close(); - } - result.addAll(writer.result()); - } - - return result; - } - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - - /** - * Resolve the partition field to sort manifests by. - * - *

Resolution rules: - * - *

    - *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. - *
- */ - @Nullable - static String resolveSortField(String sortPartitionField, RowType partitionType) { - if (sortPartitionField != null && !sortPartitionField.isEmpty()) { - return sortPartitionField; - } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; - } - /** - * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD - * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is - * cancelled; if not, the file is kept as-is ({@code requireChange = false}). - */ - private static FullCompactionReadResult readForSortRewrite( - ManifestFileMeta meta, - ManifestFile manifestFile, - Set deletedIdentifiers) { - if (meta.numDeletedFiles() > 0) { - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (entry.kind() == FileKind.ADD - && !deletedIdentifiers.contains(entry.identifier())) { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, true, entries); - } else { - boolean requireChange = false; - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (deletedIdentifiers.contains(entry.identifier())) { - requireChange = true; - } else { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, requireChange, entries); - } - } - - private static class FullCompactionReadResult { + static class FullCompactionReadResult { - private final ManifestFileMeta file; - private final boolean requireChange; - private final List entries; + final ManifestFileMeta file; + final boolean requireChange; + final List entries; - private FullCompactionReadResult( + FullCompactionReadResult( ManifestFileMeta file, boolean requireChange, List entries) { this.file = file; this.requireChange = requireChange; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java new file mode 100644 index 000000000000..9f40d430be91 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -0,0 +1,745 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import org.apache.paimon.CoreOptions; +import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.io.RollingFileWriter; +import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; +import org.apache.paimon.manifest.ManifestEntry; +import org.apache.paimon.manifest.ManifestFile; +import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; +import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.Filter; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import static java.util.Collections.singletonList; +import static org.apache.paimon.utils.ManifestReadThreadPool.sequentialBatchedExecute; + +/** Manifest file sorter that sorts and rewrites manifest files by a configured partition field. */ +public class ManifestFileSorter { + + private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); + + /** + * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort + * field cannot be resolved or the delta file size is below the full compaction threshold, the + * input is returned as-is. + */ + static Optional> trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + String sortPartitionField = options.manifestSortPartitionField(); + int mergeMinCount = options.manifestMergeMinCount(); + // Step 1: Resolve sort field. + String sortField = resolveSortField(sortPartitionField, partitionType); + if (sortField == null) { + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite. "); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + + // Step 2: Classify manifests into defaultCompaction and LSM groups. + ClassifyResult classified = + classifyManifests( + input, + suggestedMetaSize, + manifestFullCompactionSize, + mergeMinCount, + manifestFile, + partitionType, + manifestReadParallelism); + List defaultCompactionManifests = classified.defaultCompactionManifests; + List lsmFiles = classified.lsmFiles; + Set deleteEntries = classified.deleteEntries; + + // Step 3: Build LSM Tree and assign levels (only for lsmFiles). + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + + // Step 4: Pick runs to compact. + int sizeAmpThreshold = options.maxSizeAmplificationPercent(); + int sizeRatioThreshold = options.sortedRunSizeRatio(); + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + List pickedRuns = pickStrategy.pick(levelRuns); + + if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + return Optional.of(input); + } + + Set pickedSet = new HashSet<>(pickedRuns); + List reusedFiles = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + reusedFiles.addAll(run.files()); + } + } + List result = new ArrayList<>(reusedFiles); + + // Step 5: Split picked files into sections, sort and rewrite each. + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + pickedFiles.addAll(defaultCompactionManifests); + + Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); + + List
sections = + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionSet); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); + long openFileCost = options.manifestSortOpenFileCost(); + List sortNewFiles = new ArrayList<>(); + + List rewritten = + rewriteSections( + sections, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + maxRewriteSize, + openFileCost, + sortNewFiles, + manifestReadParallelism); + result.addAll(rewritten); + + newFilesForAbort.addAll(sortNewFiles); + return Optional.of(result); + } + + /** + * Classify manifest files into default-compaction group and LSM group. + * + *

When full compaction is triggered (totalDeltaFileSize >= threshold), files that must + * change or overlap with delete partitions go into defaultCompactionManifests; the rest stay as + * lsmFiles. + * + *

When full compaction is NOT triggered, adjacent small manifests whose cumulative size + * reaches suggestedMetaSize are grouped into defaultCompactionManifests (minor-style pick). + */ + private static ClassifyResult classifyManifests( + List input, + long suggestedMetaSize, + long manifestFullCompactionSize, + int mergeMinCount, + ManifestFile manifestFile, + RowType partitionType, + @Nullable Integer manifestReadParallelism) { + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + + List defaultCompactionManifests = new ArrayList<>(); + List lsmFiles = new LinkedList<>(input); + Set deleteEntries = null; + + if (totalDeltaFileSize >= manifestFullCompactionSize) { + // Full compact triggered: read delete entries and classify by predicate. + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } + } + + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + defaultCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + defaultCompactionManifests.add(file); + } + } + } else { + // Minor-style pick: merge adjacent small manifests when no full compact triggered. + List candidates = new ArrayList<>(); + long candidateSize = 0; + for (ManifestFileMeta file : input) { + candidateSize += file.fileSize(); + candidates.add(file); + if (candidateSize >= suggestedMetaSize) { + if (candidates.size() > 1) { + defaultCompactionManifests.addAll(candidates); + lsmFiles.removeAll(candidates); + } + candidates.clear(); + candidateSize = 0; + } + } + if (candidates.size() >= mergeMinCount) { + defaultCompactionManifests.addAll(candidates); + lsmFiles.removeAll(candidates); + } + } + + return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); + } + + /** + * Iterate over sections, decide whether to rewrite each section fully or partially based on the + * maxRewriteSize threshold and whether the section contains defaultCompaction files. + * + *

Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold + * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds + * threshold with no defaultCompaction files: skip (keep as-is). + * + * @return the list of result manifest files (both rewritten and kept-as-is) + */ + private static List rewriteSections( + List

sections, + Set defaultCompactionSet, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + long suggestedMetaSize, + long maxRewriteSize, + long openFileCost, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + List result = new ArrayList<>(); + long processedSize = 0; + + for (Section section : sections) { + // Single-file section without defaultCompaction: already sorted, skip rewrite. + if (section.files.size() == 1 && !section.hasDefaultCompactMeta) { + result.addAll(section.files); + continue; + } + + long sectionSize = section.totalSize + (long) section.files.size() * openFileCost; + + boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; + if (exceedsThreshold && !section.hasDefaultCompactMeta) { + result.addAll(section.files); + continue; + } + + if (!exceedsThreshold) { + processedSize += sectionSize; + List merged = + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else { + rewriteSubSegments( + section.files, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + sortNewFiles, + result, + manifestReadParallelism); + } + } + return result; + } + + /** + * Rewrite sub-segments within a section that exceeds the rewrite threshold. Only sub-segments + * containing defaultCompaction files are rewritten; other files are kept as-is. + */ + private static void rewriteSubSegments( + List section, + Set defaultCompactionSet, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + long suggestedMetaSize, + List sortNewFiles, + List result, + @Nullable Integer manifestReadParallelism) + throws Exception { + List subSegment = new ArrayList<>(); + long subSegmentSize = 0; + for (ManifestFileMeta m : section) { + if (defaultCompactionSet.contains(m)) { + subSegment.add(m); + subSegmentSize += m.fileSize(); + } else if (!subSegment.isEmpty()) { + subSegment.add(m); + subSegmentSize += m.fileSize(); + if (subSegmentSize >= suggestedMetaSize) { + List merged = + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + subSegment = new ArrayList<>(); + subSegmentSize = 0; + } + } else { + result.add(m); + } + } + // Flush remaining sub-segment + if (!subSegment.isEmpty()) { + List merged = + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } + } + + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> runFilesList = new ArrayList<>(); + List currentRun = new ArrayList<>(); + currentRun.add(input.get(0)); + for (int i = 1; i < input.size(); i++) { + ManifestFileMeta file = input.get(i); + ManifestFileMeta last = currentRun.get(currentRun.size() - 1); + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + currentRun.add(file); + } else { + runFilesList.add(currentRun); + currentRun = new ArrayList<>(); + currentRun.add(file); + } + } + runFilesList.add(currentRun); + + List runs = new ArrayList<>(runFilesList.size()); + for (List rf : runFilesList) { + runs.add(ManifestSortedRun.fromSorted(rf)); + } + + runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = runs.size(); + for (int i = 0; i < n; i++) { + if (i >= n - 4) { + runs.get(i).setLevel(n - i); + } else { + runs.get(i).setLevel(0); + } + } + return runs; + } + + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. + */ + static List
splitIntoSections( + List pickedFiles, + int sortFieldIndex, + DataType sortFieldType, + Set defaultCompactionSet) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List
sections = new ArrayList<>(); + List currentFiles = new ArrayList<>(); + long currentTotalSize = 0; + boolean currentHasDefault = false; + ManifestFileMeta first = pickedFiles.get(0); + currentFiles.add(first); + currentTotalSize += first.fileSize(); + currentHasDefault = defaultCompactionSet.contains(first); + BinaryRow sectionMaxBound = first.partitionStats().maxValues(); + + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + >= 0) { + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + currentFiles = new ArrayList<>(); + currentTotalSize = 0; + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentHasDefault = defaultCompactionSet.contains(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentFiles.add(file); + currentTotalSize += file.fileSize(); + if (!currentHasDefault && defaultCompactionSet.contains(file)) { + currentHasDefault = true; + } + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + return sections; + } + + /** + * Merge small adjacent sections to avoid producing too many small rewrite batches. If either + * the pending section or the current section total size is smaller than half of {@code + * suggestedMetaSize}, they are combined into a single section. + */ + private static List
mergeSmallAdjacentSections( + List
sections, long suggestedMetaSize) { + long smallThreshold = suggestedMetaSize / 2; + List
merged = new ArrayList<>(); + Section pending = null; + + for (Section section : sections) { + if (pending == null) { + pending = section; + } else { + if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + pending = Section.merge(pending, section); + } else { + merged.add(pending); + pending = section; + } + } + } + if (pending != null) { + merged.add(pending); + } + return merged; + } + + /** + * Read all entries from a section's manifest files, sort them in memory by the specified + * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving + * entries to new manifest files via the rolling writer. + * + *

All files participate in sorting, enabling full sort across the entire section. + * + *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as + * {@link ManifestFileMerger#tryFullCompaction}. + */ + private static List sortAndRewriteSection( + List section, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) + throws Exception { + + Set safeDeletedIds = + deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); + + // Parallel read: each meta is read independently + Function> reader = + meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + + List entriesToRewrite = new ArrayList<>(); + for (FullCompactionReadResult readResult : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + entriesToRewrite.addAll(readResult.entries); + } + + List result = new ArrayList<>(); + if (!entriesToRewrite.isEmpty()) { + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : entriesToRewrite) { + writer.write(entry); + } + } finally { + writer.close(); + } + result.addAll(writer.result()); + } + + return result; + } + + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(String sortPartitionField, RowType partitionType) { + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + return sortPartitionField; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } + + /** + * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD + * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is + * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + */ + private static FullCompactionReadResult readForSortRewrite( + ManifestFileMeta meta, + ManifestFile manifestFile, + Set deletedIdentifiers) { + if (meta.numDeletedFiles() > 0) { + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD + && !deletedIdentifiers.contains(entry.identifier())) { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, true, entries); + } else { + boolean requireChange = false; + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (deletedIdentifiers.contains(entry.identifier())) { + requireChange = true; + } else { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, requireChange, entries); + } + } + + /** A section of manifest files with pre-computed metadata. */ + static class Section { + final List files; + final long totalSize; + final boolean hasDefaultCompactMeta; + + Section(List files, long totalSize, boolean hasDefaultCompactMeta) { + this.files = files; + this.totalSize = totalSize; + this.hasDefaultCompactMeta = hasDefaultCompactMeta; + } + + /** Create a merged section from two sections. */ + static Section merge(Section a, Section b) { + List merged = new ArrayList<>(a.files); + merged.addAll(b.files); + return new Section( + merged, + a.totalSize + b.totalSize, + a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); + } + } + + /** Result of classifying manifest files into default-compaction and LSM groups. */ + private static class ClassifyResult { + final List defaultCompactionManifests; + final List lsmFiles; + @Nullable final Set deleteEntries; + + ClassifyResult( + List defaultCompactionManifests, + List lsmFiles, + @Nullable Set deleteEntries) { + this.defaultCompactionManifests = defaultCompactionManifests; + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + } + } +} diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index f4adf35802e4..6ded1beead21 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -939,9 +939,6 @@ public void testManifestSortWithOverlappingPartitions() { input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "200B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1036,9 +1033,6 @@ public void testManifestSortWithShuffledOverlappingPartitions() { input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "100B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1125,9 +1119,6 @@ public void testManifestSortWithMultipleOverlappingRuns() { input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "100B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1224,7 +1215,6 @@ public void testManifestSortEliminatesDeleteEntries() { // Set target file size very large so all input manifests are considered "small" // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition testOptions.set("manifest.target-file-size", "16MB"); - testOptions.set("manifest.merge-min-count", "3"); // Set full-compaction threshold very small to ensure it triggers testOptions.set("manifest.full-compaction-threshold-size", "1B"); testOptions.set("manifest-sort.enable", "true"); From f32d3b62a575d2ddb3e9619948e1e87d9dd70796 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:37:22 +0800 Subject: [PATCH 08/48] rm --- .../paimon/operation/ManifestFileSorter.java | 2 +- .../paimon/manifest/ManifestFileMetaTest.java | 93 ------------------- 2 files changed, 1 insertion(+), 94 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 9f40d430be91..53d04cefc5e4 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -78,7 +78,7 @@ static Optional> trySortRewrite( String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite. "); + "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 6ded1beead21..9c7bb03aa204 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -971,99 +971,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - /** - * Test manifest sort with more manifests having overlapping partition ranges. Creates a larger - * number of manifests in shuffled order to stress-test the sort rewrite logic. - * - *

Input manifests (shuffled, all ADD-only): - * - *

-     *   manifest-1: partitions [6, 10]
-     *   manifest-2: partitions [0, 3]
-     *   manifest-3: partitions [4, 8]  -- overlaps 1 and 2
-     *   manifest-4: partitions [9, 14] -- overlaps 1
-     *   manifest-5: partitions [2, 5]  -- overlaps 2 and 3
-     *   manifest-6: partitions [11, 15]-- overlaps 4
-     * 
- */ - @Test - public void testManifestSortWithShuffledOverlappingPartitions() { - List input = new ArrayList<>(); - - // manifest-1: partitions [6, 10] - List entries1 = new ArrayList<>(); - for (int p = 6; p <= 10; p++) { - entries1.add(makeEntry(true, String.format("m1-p%d", p), p)); - } - input.add(makeManifest(entries1.toArray(new ManifestEntry[0]))); - - // manifest-2: partitions [0, 3] - List entries2 = new ArrayList<>(); - for (int p = 0; p <= 3; p++) { - entries2.add(makeEntry(true, String.format("m2-p%d", p), p)); - } - input.add(makeManifest(entries2.toArray(new ManifestEntry[0]))); - - // manifest-3: partitions [4, 8] -- overlaps manifest-1 and manifest-2 - List entries3 = new ArrayList<>(); - for (int p = 4; p <= 8; p++) { - entries3.add(makeEntry(true, String.format("m3-p%d", p), p)); - } - input.add(makeManifest(entries3.toArray(new ManifestEntry[0]))); - - // manifest-4: partitions [9, 14] -- overlaps manifest-1 - List entries4 = new ArrayList<>(); - for (int p = 9; p <= 14; p++) { - entries4.add(makeEntry(true, String.format("m4-p%d", p), p)); - } - input.add(makeManifest(entries4.toArray(new ManifestEntry[0]))); - - // manifest-5: partitions [2, 5] -- overlaps manifest-2 and manifest-3 - List entries5 = new ArrayList<>(); - for (int p = 2; p <= 5; p++) { - entries5.add(makeEntry(true, String.format("m5-p%d", p), p)); - } - input.add(makeManifest(entries5.toArray(new ManifestEntry[0]))); - - // manifest-6: partitions [11, 15] -- overlaps manifest-4 - List entries6 = new ArrayList<>(); - for (int p = 11; p <= 15; p++) { - entries6.add(makeEntry(true, String.format("m6-p%d", p), p)); - } - input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); - - Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); - - List merged = - ManifestFileMerger.merge( - input, - manifestFile, - getPartitionType(), - CoreOptions.fromMap(testOptions.toMap())); - - // Verify no data loss - assertEquivalentEntries(input, merged); - - // Verify entries within each output manifest are sorted by partition - for (ManifestFileMeta meta : merged) { - List entries = manifestFile.read(meta.fileName(), meta.fileSize()); - for (int i = 1; i < entries.size(); i++) { - int prevPartition = entries.get(i - 1).partition().getInt(0); - int currPartition = entries.get(i).partition().getInt(0); - assertThat(currPartition) - .as("Entries within a manifest should be sorted by partition") - .isGreaterThanOrEqualTo(prevPartition); - } - } - - // Verify output manifests are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } - } /** * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This From 39230db86acb140c0c4eed54ff969941b66f3190 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:37:51 +0800 Subject: [PATCH 09/48] fx --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 9c7bb03aa204..cd3ff5180899 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -971,7 +971,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - /** * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This * exercises buildLevelSortedRuns and the LSM level assignment logic. From 179276cec20e04cf761f1b24f868adf0404afe3d Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 15:46:08 +0800 Subject: [PATCH 10/48] fix # Conflicts: # paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java # Conflicts: # paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java --- .../generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 16 ++-- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 40 ++++++---- .../paimon/schema/SchemaValidation.java | 21 +++++ .../paimon/manifest/ManifestFileMetaTest.java | 6 +- .../paimon/schema/SchemaValidationTest.java | 77 +++++++++++++++++++ 7 files changed, 134 insertions(+), 33 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 14363c7ffd02..febc8346f648 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -907,7 +907,7 @@
manifest-sort.enable
false Boolean - Whether to invoke manifest sort rewrite right after manifest merge during commit. The sort rewrite implementation is provided by an external module (e.g. morax) and discovered via ServiceLoader. When no implementation is registered on the classpath, this flag has no effect (manifest sort is silently skipped). + Whether to invoke manifest sort rewrite right after manifest merge during commit.
manifest-sort.max-rewrite-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index a67ff6ca90d1..4b627122b23b 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -469,17 +469,13 @@ public InlineElement getDescription() { "To avoid frequent manifest merges, this parameter specifies the minimum number " + "of ManifestFileMeta to merge."); - public static final ConfigOption MANIFEST_SORT_ENABLE = - key("manifest-sort.enable") + public static final ConfigOption MANIFEST_SORT_ENABLED = + key("manifest-sort.enabled") .booleanType() .defaultValue(false) .withDescription( "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit. The sort rewrite implementation is provided" - + " by an external module (e.g. morax) and discovered via" - + " ServiceLoader. When no implementation is registered on the" - + " classpath, this flag has no effect (manifest sort is" - + " silently skipped)."); + + " during commit.)."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") @@ -491,7 +487,7 @@ public InlineElement getDescription() { + " caller (an external sort rewrite implementation). For" + " single-partition tables, optional (defaults to the only" + " partition field). For multi-partition tables, REQUIRED" - + " when 'manifest-sort.enable' is true."); + + " when 'manifest-sort.enabled' is true."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") @@ -2608,8 +2604,8 @@ public MemorySize manifestFullCompactionThresholdSize() { return options.get(MANIFEST_FULL_COMPACTION_FILE_SIZE); } - public boolean manifestSortEnable() { - return options.get(MANIFEST_SORT_ENABLE); + public boolean manifestSortEnabled() { + return options.get(MANIFEST_SORT_ENABLED); } @Nullable diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 83ef75a0335a..9c48d7d84508 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -75,8 +75,9 @@ public static List merge( List newFilesForAbort = new ArrayList<>(); try { - // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite - if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { + // If manifest-sort.enabled is enabled and there are partition fields, use + // trySortRewrite + if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { Optional> sorted = ManifestFileSorter.trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 53d04cefc5e4..c089b561fc5d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -244,6 +244,9 @@ private static ClassifyResult classifyManifests( defaultCompactionManifests.addAll(candidates); lsmFiles.removeAll(candidates); } + deleteEntries = + FileEntry.readDeletedEntries( + manifestFile, defaultCompactionManifests, manifestReadParallelism); } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -579,11 +582,18 @@ private static List sortAndRewriteSection( RollingFileWriter writer = manifestFile.createRollingWriter(); + Exception exception = null; try { for (ManifestEntry entry : entriesToRewrite) { writer.write(entry); } + } catch (Exception e) { + exception = e; } finally { + if (exception != null) { + writer.abort(); + throw exception; + } writer.close(); } result.addAll(writer.result()); @@ -673,35 +683,31 @@ static String resolveSortField(String sortPartitionField, RowType partitionType) } /** - * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD - * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is - * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + * Read a single manifest file for sort rewrite. + * + *

When {@code deletedIdentifiers} is non-empty (full compaction path), only surviving ADD + * entries (not cancelled by deletedIdentifiers) are kept, and DELETE entries are dropped + * because the full compaction has already resolved them. + * + *

When {@code deletedIdentifiers} is empty (non-full-compaction path), all entries (both ADD + * and DELETE) are preserved to avoid losing unresolved DELETE entries. */ private static FullCompactionReadResult readForSortRewrite( ManifestFileMeta meta, ManifestFile manifestFile, Set deletedIdentifiers) { - if (meta.numDeletedFiles() > 0) { - List entries = new ArrayList<>(); + List entries = new ArrayList<>(); + if (deletedIdentifiers.isEmpty()) { + entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { if (entry.kind() == FileKind.ADD && !deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } } - return new FullCompactionReadResult(meta, true, entries); - } else { - boolean requireChange = false; - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (deletedIdentifiers.contains(entry.identifier())) { - requireChange = true; - } else { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, requireChange, entries); } + return new FullCompactionReadResult(meta, true, entries); } /** A section of manifest files with pre-computed metadata. */ diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java index 4ffc3ec0259e..7c60b81d0475 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java @@ -314,6 +314,8 @@ public static void validateTableSchema(TableSchema schema) { validateChangelogReadSequenceNumber(schema, options); validatePkClusteringOverride(options); + + validateManifestSort(schema, options); } public static void validateFallbackBranch(SchemaManager schemaManager, TableSchema schema) { @@ -1013,4 +1015,23 @@ public static void validatePkClusteringOverride(CoreOptions options) { } } } + + private static void validateManifestSort(TableSchema schema, CoreOptions options) { + if (options.manifestSortEnabled()) { + checkArgument( + !schema.partitionKeys().isEmpty(), + "Cannot enable '%s' for non-partition table.", + CoreOptions.MANIFEST_SORT_ENABLED.key()); + } + + String sortPartitionField = options.manifestSortPartitionField(); + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + checkArgument( + schema.partitionKeys().contains(sortPartitionField), + "'%s' = '%s' is not a partition field. Available partition fields: %s.", + CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), + sortPartitionField, + schema.partitionKeys()); + } + } } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index cd3ff5180899..d1c15d412fad 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -939,7 +939,7 @@ public void testManifestSortWithOverlappingPartitions() { input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( @@ -1025,7 +1025,7 @@ public void testManifestSortWithMultipleOverlappingRuns() { input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( @@ -1123,7 +1123,7 @@ public void testManifestSortEliminatesDeleteEntries() { testOptions.set("manifest.target-file-size", "16MB"); // Set full-compaction threshold very small to ensure it triggers testOptions.set("manifest.full-compaction-threshold-size", "1B"); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index d518f79a20f5..0aa62207f099 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -485,4 +485,81 @@ public void testFileFormatPerLevelAcceptsCompatibleSchema() { validateTableSchema( new TableSchema(1, fields, 10, emptyList(), singletonList("k"), options, "")); } + + @Test + void testManifestSortEnableOnNonPartitionTable() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + emptyList(), + emptyList(), + options, + ""))) + .hasMessageContaining( + "Cannot enable 'manifest-sort.enabled' for non-partition table."); + } + + @Test + void testManifestSortPartitionFieldNotInPartitionKeys() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + singletonList("f0"), + emptyList(), + options, + ""))) + .hasMessageContaining("is not a partition field"); + } + + @Test + void testManifestSortValidConfig() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatNoException() + .isThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + singletonList("f0"), + emptyList(), + options, + ""))); + } } From 643e0f2869cfa5a04ec41a1b18181ebbf9758842 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 16:35:01 +0800 Subject: [PATCH 11/48] fix --- .../generated/core_configuration.html | 4 +- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/FileStoreCommitImpl.java | 11 ++-- .../paimon/operation/ManifestFileSorter.java | 59 ++++++++++++++----- .../operation/ManifestPickStrategy.java | 12 ++-- 5 files changed, 60 insertions(+), 28 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index febc8346f648..71888d6ca2f2 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -904,7 +904,7 @@ To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge. -

manifest-sort.enable
+
manifest-sort.enabled
false Boolean Whether to invoke manifest sort rewrite right after manifest merge during commit. @@ -925,7 +925,7 @@
manifest-sort.partition-field
(none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enable' is true. + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enabled' is true.
manifest.target-file-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 4b627122b23b..312a1f1f0d87 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -475,7 +475,7 @@ public InlineElement getDescription() { .defaultValue(false) .withDescription( "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit.)."); + + " during commit."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index 3f994947f1b7..df24f019834b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -54,6 +54,7 @@ import org.apache.paimon.operation.commit.SuccessCommitResult; import org.apache.paimon.operation.metrics.CommitMetrics; import org.apache.paimon.operation.metrics.CommitStats; +import org.apache.paimon.options.MemorySize; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.partition.PartitionStatistics; @@ -1179,16 +1180,16 @@ private boolean compactManifestOnce() { manifestList.readDataManifests(latestSnapshot); List mergeAfterManifests; - // the fist trial - Options tempOptions = options.toConfiguration(); - tempOptions.set("manifest.merge-min-count", "1"); - tempOptions.set("manifest.full-compaction-threshold-size", "1B"); + // the fist trial: use a copied options with forced full compaction settings + Options compactOptions = Options.fromMap(options.toMap()); + compactOptions.set(CoreOptions.MANIFEST_MERGE_MIN_COUNT, 1); + compactOptions.set(CoreOptions.MANIFEST_FULL_COMPACTION_FILE_SIZE, MemorySize.ofBytes(1)); mergeAfterManifests = ManifestFileMerger.merge( mergeBeforeManifests, manifestFile, partitionType, - CoreOptions.fromMap(tempOptions.toMap())); + CoreOptions.fromMap(compactOptions.toMap())); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c089b561fc5d..49a436a57ed9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -22,7 +22,6 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; -import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; @@ -42,6 +41,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -111,9 +111,19 @@ static Optional> trySortRewrite( List pickedRuns = pickStrategy.pick(levelRuns); if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + LOG.debug( + "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); return Optional.of(input); } + LOG.info( + "Manifest sort rewrite: input={} files, lsm={} runs, picked={} runs, " + + "defaultCompaction={} files.", + input.size(), + levelRuns.size(), + pickedRuns.size(), + defaultCompactionManifests.size()); + Set pickedSet = new HashSet<>(pickedRuns); List reusedFiles = new ArrayList<>(); for (ManifestSortedRun run : levelRuns) { @@ -155,6 +165,11 @@ static Optional> trySortRewrite( result.addAll(rewritten); newFilesForAbort.addAll(sortNewFiles); + LOG.info( + "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", + sections.size(), + sortNewFiles.size(), + result.size()); return Optional.of(result); } @@ -226,6 +241,7 @@ private static ClassifyResult classifyManifests( } } else { // Minor-style pick: merge adjacent small manifests when no full compact triggered. + Set toRemove = new HashSet<>(); List candidates = new ArrayList<>(); long candidateSize = 0; for (ManifestFileMeta file : input) { @@ -234,7 +250,7 @@ private static ClassifyResult classifyManifests( if (candidateSize >= suggestedMetaSize) { if (candidates.size() > 1) { defaultCompactionManifests.addAll(candidates); - lsmFiles.removeAll(candidates); + toRemove.addAll(candidates); } candidates.clear(); candidateSize = 0; @@ -242,11 +258,11 @@ private static ClassifyResult classifyManifests( } if (candidates.size() >= mergeMinCount) { defaultCompactionManifests.addAll(candidates); - lsmFiles.removeAll(candidates); + toRemove.addAll(candidates); + } + if (!toRemove.isEmpty()) { + lsmFiles.removeIf(toRemove::contains); } - deleteEntries = - FileEntry.readDeletedEntries( - manifestFile, defaultCompactionManifests, manifestReadParallelism); } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -435,7 +451,8 @@ static List buildLevelSortedRuns( int n = runs.size(); for (int i = 0; i < n; i++) { if (i >= n - 4) { - runs.get(i).setLevel(n - i); + // top-4 largest runs get level 4-1 + runs.get(i).setLevel(i - (n - 4) + 1); } else { runs.get(i).setLevel(0); } @@ -580,6 +597,17 @@ private static List sortAndRewriteSection( if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + // When non-full-compact (deletedIdentifiers is null, meaning delete entries + // were not read), entries may contain both ADD and DELETE. Merge them following + // FileEntry.mergeEntries logic to cancel paired ADD/DELETE and keep unresolved + // DELETE entries whose ADD is in a previous manifest file. + if (deletedIdentifiers == null) { + LinkedHashMap mergedMap = + new LinkedHashMap<>(); + FileEntry.mergeEntries(entriesToRewrite, mergedMap); + entriesToRewrite = new ArrayList<>(mergedMap.values()); + } + RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -657,6 +685,11 @@ static int compareSortKey( if (c != 0) { return c; } + // ADD before DELETE, so that mergeEntries can correctly cancel pairs + int kindCmp = a.kind().compareTo(b.kind()); + if (kindCmp != 0) { + return kindCmp; + } return a.file().fileName().compareTo(b.file().fileName()); } @@ -667,19 +700,14 @@ static int compareSortKey( * *
    *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. + *
  4. Otherwise, default to the first partition field. *
*/ - @Nullable static String resolveSortField(String sortPartitionField, RowType partitionType) { if (sortPartitionField != null && !sortPartitionField.isEmpty()) { return sortPartitionField; } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; + return partitionType.getFieldNames().get(0); } /** @@ -701,8 +729,7 @@ private static FullCompactionReadResult readForSortRewrite( entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (entry.kind() == FileKind.ADD - && !deletedIdentifiers.contains(entry.identifier())) { + if (!deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 6421328550c9..736425bb4d6f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -109,16 +109,20 @@ private List pickForSizeAmp(List levelRuns private List pickForSizeRatioAndForce(List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) List picked = new ArrayList<>(); - long pickedSize = 0; - // From low to high: forced pick level0/level1, then SizeRatio for the rest. - for (ManifestSortedRun run : levelRuns) { + // Always pick the first run to guarantee a non-empty result. + picked.add(levelRuns.get(0)); + long pickedSize = levelRuns.get(0).totalSize(); + + // From the second run onward: forced pick level0/level1, then SizeRatio for the rest. + for (int i = 1; i < levelRuns.size(); i++) { + ManifestSortedRun run = levelRuns.get(i); if (run.level() <= 1) { picked.add(run); pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { + if (pickedSize * sizeRatioThreshold >= nextRunSize) { picked.add(run); pickedSize += nextRunSize; } From eff28657fd6bfdd547a813bcffd49568908c7016 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 16:54:39 +0800 Subject: [PATCH 12/48] fix --- .../shortcodes/generated/core_configuration.html | 2 +- .../main/java/org/apache/paimon/CoreOptions.java | 6 ++---- .../apache/paimon/operation/ManifestFileSorter.java | 13 +++++-------- .../paimon/operation/ManifestPickStrategy.java | 8 +++++--- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 71888d6ca2f2..52dbef3b188b 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -925,7 +925,7 @@
manifest-sort.partition-field
(none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enabled' is true. + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). If not configured, defaults to the first partition field.
manifest.target-file-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 312a1f1f0d87..4246366b0458 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -484,10 +484,8 @@ public InlineElement getDescription() { .withDescription( "Partition field name to sort manifest entries by. Validated by" + " schema validation; resolved to a 0-based index by the" - + " caller (an external sort rewrite implementation). For" - + " single-partition tables, optional (defaults to the only" - + " partition field). For multi-partition tables, REQUIRED" - + " when 'manifest-sort.enabled' is true."); + + " caller (an external sort rewrite implementation). If" + + " not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 49a436a57ed9..1475d9b13611 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -449,10 +449,10 @@ static List buildLevelSortedRuns( runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); int n = runs.size(); + int maxLevel = 4; for (int i = 0; i < n; i++) { - if (i >= n - 4) { - // top-4 largest runs get level 4-1 - runs.get(i).setLevel(i - (n - 4) + 1); + if (i >= n - maxLevel) { + runs.get(i).setLevel(i - (n - maxLevel) + 1); } else { runs.get(i).setLevel(0); } @@ -580,12 +580,9 @@ private static List sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { - Set safeDeletedIds = - deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - // Parallel read: each meta is read independently Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); List entriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : @@ -725,7 +722,7 @@ private static FullCompactionReadResult readForSortRewrite( ManifestFile manifestFile, Set deletedIdentifiers) { List entries = new ArrayList<>(); - if (deletedIdentifiers.isEmpty()) { + if (deletedIdentifiers == null || deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 736425bb4d6f..ffd10fcf5473 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -18,6 +18,8 @@ package org.apache.paimon.operation; +import org.apache.paimon.utils.Preconditions; + import java.util.ArrayList; import java.util.List; @@ -32,8 +34,6 @@ *
  • SizeRatio: from low to high, pick adjacent runs whose amplification factor is less * than {@code sizeRatioThreshold}. *
  • Forced pick: level0 and level1 runs are always picked. - *
  • Delete pick: additionally pick runs containing manifest files with {@code - * numDeletedFiles > 0}. * */ public class ManifestPickStrategy { @@ -42,6 +42,8 @@ public class ManifestPickStrategy { private final int sizeRatioThreshold; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { + Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); + Preconditions.checkArgument(sizeRatioThreshold > 0, "sizeRatioThreshold must be positive"); this.sizeAmpThreshold = sizeAmpThreshold; this.sizeRatioThreshold = sizeRatioThreshold; } @@ -91,7 +93,7 @@ private List pickForSizeAmp(List levelRuns } } - if (lowerLevelTotalSize > highestRun.totalSize() * sizeAmpThreshold) { + if (lowerLevelTotalSize / sizeAmpThreshold > highestRun.totalSize()) { return new ArrayList<>(levelRuns); } return null; From cd36036d03eafdb2b0c8a64c49508c893c335400 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 17:09:32 +0800 Subject: [PATCH 13/48] fix --- .../operation/ManifestPickStrategy.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index ffd10fcf5473..524caed50dbf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -70,8 +70,11 @@ public List pick(List levelRuns) { } /** - * SizeAmp check: if all lower-level (0~3) runs' total size > highest-level (level4) run's size - * * sizeAmpThreshold, pick all runs for full compaction. + * SizeAmp check: if all lower-level (0~3) runs' total size exceeds the highest-level run's size + * by more than {@code sizeAmpThreshold} percent, pick all runs for full compaction. + * + *

    Formula (consistent with {@code UniversalCompaction#pickForSizeAmp}): {@code + * lowerLevelTotalSize * 100 > sizeAmpThreshold * highestRunSize} */ private List pickForSizeAmp(List levelRuns) { if (levelRuns.isEmpty()) { @@ -93,7 +96,8 @@ private List pickForSizeAmp(List levelRuns } } - if (lowerLevelTotalSize / sizeAmpThreshold > highestRun.totalSize()) { + // size amplification = percentage of additional size + if (lowerLevelTotalSize * 100 > (long) sizeAmpThreshold * highestRun.totalSize()) { return new ArrayList<>(levelRuns); } return null; @@ -104,9 +108,12 @@ private List pickForSizeAmp(List levelRuns * *

      *
    • Level0 and level1 are always picked. - *
    • From low to high, if the cumulative picked size * sizeRatioThreshold >= next run's - * size, continue picking. + *
    • From low to high, if the cumulative picked size with ratio amplification covers the + * next run's size, continue picking. *
    + * + *

    Formula (consistent with {@code UniversalCompaction#pickForSizeRatio}): {@code pickedSize + * * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize} */ private List pickForSizeRatioAndForce(List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) @@ -124,13 +131,15 @@ private List pickForSizeRatioAndForce(List pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize * sizeRatioThreshold >= nextRunSize) { + if (pickedSize * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize) { picked.add(run); pickedSize += nextRunSize; } } } - + if (picked.size() == 1) { + return new ArrayList<>(); + } return picked; } } From e472063f4149cd7d76b6100921276b2d28a5ce85 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 17:32:38 +0800 Subject: [PATCH 14/48] rmMinorComp --- .../generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 3 +- .../paimon/operation/ManifestFileSorter.java | 39 ------------------- 3 files changed, 2 insertions(+), 42 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 52dbef3b188b..daae088f61c5 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -925,7 +925,7 @@

    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field.
    manifest.target-file-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 4246366b0458..c7373d38d14f 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -483,8 +483,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; resolved to a 0-based index by the" - + " caller (an external sort rewrite implementation). If" + + " schema validation; If" + " not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1475d9b13611..dfca7380250c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -41,7 +41,6 @@ import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -73,7 +72,6 @@ static Optional> trySortRewrite( long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); - int mergeMinCount = options.manifestMergeMinCount(); // Step 1: Resolve sort field. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -89,7 +87,6 @@ static Optional> trySortRewrite( input, suggestedMetaSize, manifestFullCompactionSize, - mergeMinCount, manifestFile, partitionType, manifestReadParallelism); @@ -187,7 +184,6 @@ private static ClassifyResult classifyManifests( List input, long suggestedMetaSize, long manifestFullCompactionSize, - int mergeMinCount, ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { @@ -239,30 +235,6 @@ private static ClassifyResult classifyManifests( defaultCompactionManifests.add(file); } } - } else { - // Minor-style pick: merge adjacent small manifests when no full compact triggered. - Set toRemove = new HashSet<>(); - List candidates = new ArrayList<>(); - long candidateSize = 0; - for (ManifestFileMeta file : input) { - candidateSize += file.fileSize(); - candidates.add(file); - if (candidateSize >= suggestedMetaSize) { - if (candidates.size() > 1) { - defaultCompactionManifests.addAll(candidates); - toRemove.addAll(candidates); - } - candidates.clear(); - candidateSize = 0; - } - } - if (candidates.size() >= mergeMinCount) { - defaultCompactionManifests.addAll(candidates); - toRemove.addAll(candidates); - } - if (!toRemove.isEmpty()) { - lsmFiles.removeIf(toRemove::contains); - } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -594,17 +566,6 @@ private static List sortAndRewriteSection( if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - // When non-full-compact (deletedIdentifiers is null, meaning delete entries - // were not read), entries may contain both ADD and DELETE. Merge them following - // FileEntry.mergeEntries logic to cancel paired ADD/DELETE and keep unresolved - // DELETE entries whose ADD is in a previous manifest file. - if (deletedIdentifiers == null) { - LinkedHashMap mergedMap = - new LinkedHashMap<>(); - FileEntry.mergeEntries(entriesToRewrite, mergedMap); - entriesToRewrite = new ArrayList<>(mergedMap.values()); - } - RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; From 42fbcc7db5d7adead8d7919cdcabe1e031e17dc5 Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 15 May 2026 16:32:34 +0800 Subject: [PATCH 15/48] test --- .../java/org/apache/paimon/CoreOptions.java | 5 +- paimon-core/pom.xml | 15 + .../paimon/operation/ManifestFileMerger.java | 1 - .../paimon/operation/ManifestFileSorter.java | 298 +++++++++++++++--- 4 files changed, 269 insertions(+), 50 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index c7373d38d14f..3ce03942070a 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -483,8 +483,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; If" - + " not configured, defaults to the first partition field."); + + " schema validation; If not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") @@ -499,7 +498,7 @@ public InlineElement getDescription() { public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = key("manifest-sort.open-file-cost") .memoryType() - .defaultValue(MemorySize.ofMebiBytes(4)) + .defaultValue(MemorySize.ofKibiBytes(40)) .withDescription( "Open file cost of a manifest file during sort rewrite. " + "It is added to each manifest file's size when computing " diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml index 9506bdf03959..e570324ee9e3 100644 --- a/paimon-core/pom.xml +++ b/paimon-core/pom.xml @@ -36,6 +36,21 @@ under the License. + + com.aliyun.jindodata + jindo-core-macos-11_0-aarch64 + 6.9.1 + + + + + + + + org.apache.paimon + paimon-ali-jindo + 1.4-ali-SNAPSHOT + org.apache.paimon paimon-common diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 9c48d7d84508..e197868849c1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -249,7 +249,6 @@ public static Optional> tryFullCompaction( } // 2.2. merge - if (toBeMerged.size() <= 1) { return Optional.empty(); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index dfca7380250c..64919346256e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -44,6 +44,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Optional; +import java.util.PriorityQueue; import java.util.Set; import java.util.function.Function; @@ -138,12 +139,24 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionManifests); Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); - - List
    sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionSet); - sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); long maxRewriteSize = options.manifestSortMaxRewriteSize(); long openFileCost = options.manifestSortOpenFileCost(); + + List
    sections = + splitIntoSections( + pickedFiles, + sortFieldIndex, + sortFieldType, + defaultCompactionSet, + openFileCost); +// sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + System.out.println( + "After splitIntoSections: sections=" + + sections.size() + + ", pickedFiles=" + + pickedFiles.size()); + LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); + List sortNewFiles = new ArrayList<>(); List rewritten = @@ -236,7 +249,6 @@ private static ClassifyResult classifyManifests( } } } - return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -266,20 +278,32 @@ private static List rewriteSections( List result = new ArrayList<>(); long processedSize = 0; - for (Section section : sections) { + boolean reachedLimit = false; + + for (int i = 0; i < sections.size(); i++) { + Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. - if (section.files.size() == 1 && !section.hasDefaultCompactMeta) { - result.addAll(section.files); + if (section.files.size() == 1) { + if (!section.hasDefaultCompactMeta || deleteEntries == null) { + result.addAll(section.files); + } else { + processedSize = processedSize + section.totalSizeWithCost; + rewriteSubSegments( + section.files, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + sortNewFiles, + result, + manifestReadParallelism); + } continue; } - - long sectionSize = section.totalSize + (long) section.files.size() * openFileCost; - + long sectionSize = section.totalSizeWithCost; boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; - if (exceedsThreshold && !section.hasDefaultCompactMeta) { - result.addAll(section.files); - continue; - } if (!exceedsThreshold) { processedSize += sectionSize; @@ -293,7 +317,63 @@ private static List rewriteSections( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - } else { + } else if (!reachedLimit) { + // First time exceeding threshold without defaultCompaction: + // partial rewrite within remaining budget. + long remaining = maxRewriteSize - processedSize; + processedSize += sectionSize; + // Split section into two parts: files within budget and remaining files + List toRewrite = new ArrayList<>(); + List remainingFiles = new ArrayList<>(); + long rewriteSize = 0; + long remainingSize = 0; + long remainingSizeWithCost = 0; + boolean remainingHasDefault = false; + + for (ManifestFileMeta file : section.files) { + long fileCost = Math.max(file.fileSize(), openFileCost); + if (rewriteSize + fileCost <= remaining) { + toRewrite.add(file); + rewriteSize += fileCost; + } else { + remainingFiles.add(file); + remainingSize += file.fileSize(); + remainingSizeWithCost += fileCost; + if (defaultCompactionSet.contains(file)) { + remainingHasDefault = true; + } + } + } + + if (toRewrite.size() > 1) { + List merged = + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else if (toRewrite.size() == 1) { + sortNewFiles.addAll(toRewrite); + result.addAll(toRewrite); + } + + // Create new section for remaining files and append to sections list + if (!remainingFiles.isEmpty()) { + Section remainingSection = + new Section( + remainingFiles, + remainingSize, + remainingSizeWithCost, + remainingHasDefault); + // Append remaining section to the end of sections list + sections.add(remainingSection); + } + reachedLimit = true; + } else if (section.hasDefaultCompactMeta) { rewriteSubSegments( section.files, defaultCompactionSet, @@ -305,6 +385,8 @@ private static List rewriteSections( sortNewFiles, result, manifestReadParallelism); + } else { + result.addAll(section.files); } } return result; @@ -368,6 +450,53 @@ private static void rewriteSubSegments( } } + /** + * Partial rewrite of a section: only rewrite files that fit within the remaining budget. Files + * beyond the budget are kept as-is. + */ + private static void partialRewriteSection( + List sectionFiles, + long remaining, + long openFileCost, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + List sortNewFiles, + List result, + @Nullable Integer manifestReadParallelism) + throws Exception { + List toRewrite = new ArrayList<>(); + int splitIndex = 0; + long partialSize = 0; + for (int i = 0; i < sectionFiles.size(); i++) { + long fileCost = Math.max(sectionFiles.get(i).fileSize(), openFileCost); + if (partialSize + fileCost > remaining) { + break; + } + toRewrite.add(sectionFiles.get(i)); + partialSize += fileCost; + splitIndex = i + 1; + } + if (toRewrite.size() > 1) { + List merged = + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else { + result.addAll(toRewrite); + } + for (int i = splitIndex; i < sectionFiles.size(); i++) { + result.add(sectionFiles.get(i)); + } + } + /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -375,6 +504,7 @@ private static void rewriteSubSegments( */ static List buildLevelSortedRuns( List input, int sortFieldIndex, DataType sortFieldType) { + // Step 1: Sort by min value (if equal, then by max value) input.sort( (a, b) -> { int cmp = @@ -393,43 +523,69 @@ static List buildLevelSortedRuns( sortFieldType); }); - List> runFilesList = new ArrayList<>(); - List currentRun = new ArrayList<>(); - currentRun.add(input.get(0)); - for (int i = 1; i < input.size(); i++) { - ManifestFileMeta file = input.get(i); - ManifestFileMeta last = currentRun.get(currentRun.size() - 1); - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - currentRun.add(file); - } else { - runFilesList.add(currentRun); - currentRun = new ArrayList<>(); - currentRun.add(file); + // Step 2: Interval graph coloring algorithm - assign files to runs + // Use priority queue to track runs by their max values + PriorityQueue> runs = + new PriorityQueue<>( + (r1, r2) -> { + ManifestFileMeta last1 = r1.get(r1.size() - 1); + ManifestFileMeta last2 = r2.get(r2.size() - 1); + return compareField( + last1.partitionStats().maxValues(), + last2.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + for (ManifestFileMeta file : input) { + boolean addedToExisting = false; + + // Try to find a run where current file's min >= run's max + if (!runs.isEmpty()) { + List earliestRun = runs.peek(); + ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); + + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + // Current file can be added to this run + runs.poll(); + earliestRun.add(file); + runs.offer(earliestRun); + addedToExisting = true; + } + } + + if (!addedToExisting) { + // Create a new run + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); } } - runFilesList.add(currentRun); - List runs = new ArrayList<>(runFilesList.size()); - for (List rf : runFilesList) { - runs.add(ManifestSortedRun.fromSorted(rf)); + // Step 3: Convert to ManifestSortedRun list + List result = new ArrayList<>(); + while (!runs.isEmpty()) { + result.add(ManifestSortedRun.fromSorted(runs.poll())); } - runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = runs.size(); + // Step 4: Sort by totalSize and assign levels + result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = result.size(); int maxLevel = 4; for (int i = 0; i < n; i++) { if (i >= n - maxLevel) { - runs.get(i).setLevel(i - (n - maxLevel) + 1); + result.get(i).setLevel(i - (n - maxLevel) + 1); } else { - runs.get(i).setLevel(0); + result.get(i).setLevel(0); } } - return runs; + System.out.println("run num: " + result.size()); + return result; } /** @@ -440,7 +596,8 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Set defaultCompactionSet) { + Set defaultCompactionSet, + long openFileCost) { pickedFiles.sort( (a, b) -> { int cmp = @@ -462,10 +619,12 @@ static List
    splitIntoSections( List
    sections = new ArrayList<>(); List currentFiles = new ArrayList<>(); long currentTotalSize = 0; + long currentTotalSizeWithCost = 0; boolean currentHasDefault = false; ManifestFileMeta first = pickedFiles.get(0); currentFiles.add(first); currentTotalSize += first.fileSize(); + currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); currentHasDefault = defaultCompactionSet.contains(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); @@ -477,16 +636,24 @@ static List
    splitIntoSections( sortFieldIndex, sortFieldType) >= 0) { - sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; + currentTotalSizeWithCost = 0; currentFiles.add(file); currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); currentHasDefault = defaultCompactionSet.contains(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); if (!currentHasDefault && defaultCompactionSet.contains(file)) { currentHasDefault = true; } @@ -500,7 +667,12 @@ static List
    splitIntoSections( } } } - sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); return sections; } @@ -552,7 +724,13 @@ private static List sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { + long totalStart = System.currentTimeMillis(); + long readTime = 0; + long sortTime = 0; + long writeTime = 0; + // Parallel read: each meta is read independently + long readStart = System.currentTimeMillis(); Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -561,11 +739,15 @@ private static List sortAndRewriteSection( sequentialBatchedExecute(reader, section, manifestReadParallelism)) { entriesToRewrite.addAll(readResult.entries); } + readTime = System.currentTimeMillis() - readStart; List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { + long sortStart = System.currentTimeMillis(); entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + sortTime = System.currentTimeMillis() - sortStart; + long writeStart = System.currentTimeMillis(); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -583,6 +765,23 @@ private static List sortAndRewriteSection( writer.close(); } result.addAll(writer.result()); + writeTime = System.currentTimeMillis() - writeStart; + } + + long totalTime = System.currentTimeMillis() - totalStart; + if (totalTime > 0) { + System.out.println( + String.format( + "[sortAndRewriteSection] Total: %d ms, Read: %d ms (%.1f%%), Sort: %d ms (%.1f%%), Write: %d ms (%.1f%%), Entries: %d, Files: %d", + totalTime, + readTime, + 100.0 * readTime / totalTime, + sortTime, + 100.0 * sortTime / totalTime, + writeTime, + 100.0 * writeTime / totalTime, + entriesToRewrite.size(), + result.size())); } return result; @@ -699,11 +898,17 @@ private static FullCompactionReadResult readForSortRewrite( static class Section { final List files; final long totalSize; + final long totalSizeWithCost; final boolean hasDefaultCompactMeta; - Section(List files, long totalSize, boolean hasDefaultCompactMeta) { + Section( + List files, + long totalSize, + long totalSizeWithCost, + boolean hasDefaultCompactMeta) { this.files = files; this.totalSize = totalSize; + this.totalSizeWithCost = totalSizeWithCost; this.hasDefaultCompactMeta = hasDefaultCompactMeta; } @@ -714,6 +919,7 @@ static Section merge(Section a, Section b) { return new Section( merged, a.totalSize + b.totalSize, + a.totalSizeWithCost + b.totalSizeWithCost, a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } From 4f889c96dfcc6e0eae11a5364f81bc3c750de74e Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 13:39:28 +0800 Subject: [PATCH 16/48] fix --- .../paimon/operation/ManifestFileMerger.java | 1 + .../paimon/operation/ManifestFileSorter.java | 49 +++++++++++-------- .../operation/ManifestPickStrategy.java | 2 +- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index e197868849c1..b6bfedbfd6bf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,6 +207,7 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); + System.out.println("Start Manifest File Full Compaction: totalManifestSize: " + totalManifestSize + ", deltaDeleteFileNum " + deltaDeleteFileNum + ", totalDeltaFileSize " + totalDeltaFileSize); // 2.1. read all delete entries diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 64919346256e..c4ef6aa802cf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -149,7 +149,7 @@ static Optional> trySortRewrite( sortFieldType, defaultCompactionSet, openFileCost); -// sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); System.out.println( "After splitIntoSections: sections=" + sections.size() @@ -167,7 +167,7 @@ static Optional> trySortRewrite( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, maxRewriteSize, openFileCost, sortNewFiles, @@ -212,7 +212,7 @@ private static ClassifyResult classifyManifests( List defaultCompactionManifests = new ArrayList<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = null; + Set deleteEntries = new HashSet<>(); if (totalDeltaFileSize >= manifestFullCompactionSize) { // Full compact triggered: read delete entries and classify by predicate. @@ -268,8 +268,8 @@ private static List rewriteSections( ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deleteEntries, - long suggestedMetaSize, + Set deleteEntries, + long manifestFullCompactionSize, long maxRewriteSize, long openFileCost, List sortNewFiles, @@ -284,7 +284,7 @@ private static List rewriteSections( Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { - if (!section.hasDefaultCompactMeta || deleteEntries == null) { + if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { processedSize = processedSize + section.totalSizeWithCost; @@ -295,7 +295,7 @@ private static List rewriteSections( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, sortNewFiles, result, manifestReadParallelism); @@ -381,7 +381,7 @@ private static List rewriteSections( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, sortNewFiles, result, manifestReadParallelism); @@ -403,7 +403,7 @@ private static void rewriteSubSegments( int sortFieldIndex, DataType sortFieldType, @Nullable Set deleteEntries, - long suggestedMetaSize, + long manifestFullCompactionSize, List sortNewFiles, List result, @Nullable Integer manifestReadParallelism) @@ -411,13 +411,19 @@ private static void rewriteSubSegments( List subSegment = new ArrayList<>(); long subSegmentSize = 0; for (ManifestFileMeta m : section) { - if (defaultCompactionSet.contains(m)) { - subSegment.add(m); - subSegmentSize += m.fileSize(); - } else if (!subSegment.isEmpty()) { + boolean shouldAccumulate = + defaultCompactionSet.contains(m) + && subSegmentSize + m.fileSize() < manifestFullCompactionSize; + + if (shouldAccumulate) { + // Continue accumulating subSegment.add(m); subSegmentSize += m.fileSize(); - if (subSegmentSize >= suggestedMetaSize) { + } else { + // Need to break the segment + if (!subSegment.isEmpty()) { + // Process accumulated subSegment first + subSegment.add(m); List merged = sortAndRewriteSection( subSegment, @@ -428,11 +434,12 @@ private static void rewriteSubSegments( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - subSegment = new ArrayList<>(); + subSegment.clear(); subSegmentSize = 0; + } else { + // Directly add to result + result.add(m); } - } else { - result.add(m); } } // Flush remaining sub-segment @@ -720,10 +727,12 @@ private static List sortAndRewriteSection( ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deletedIdentifiers, + Set deletedIdentifiers, @Nullable Integer manifestReadParallelism) throws Exception { - + if (section.size() == 1 && deletedIdentifiers.isEmpty()) { + return section; + } long totalStart = System.currentTimeMillis(); long readTime = 0; long sortTime = 0; @@ -882,7 +891,7 @@ private static FullCompactionReadResult readForSortRewrite( ManifestFile manifestFile, Set deletedIdentifiers) { List entries = new ArrayList<>(); - if (deletedIdentifiers == null || deletedIdentifiers.isEmpty()) { + if (deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 524caed50dbf..9744df5d7f1c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -55,7 +55,7 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @return list of picked runs to compact */ public List pick(List levelRuns) { - if (levelRuns.isEmpty()) { + if (levelRuns.isEmpty() || levelRuns.size() < 5) { return new ArrayList<>(); } From a718aa0c4a25997a03bac88650c84bb4ce097a10 Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 14:03:28 +0800 Subject: [PATCH 17/48] rmTrigger --- .../paimon/operation/ManifestFileSorter.java | 64 ++++++++----------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c4ef6aa802cf..0d85a4dd4ec1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -196,57 +196,45 @@ static Optional> trySortRewrite( private static ClassifyResult classifyManifests( List input, long suggestedMetaSize, - long manifestFullCompactionSize, ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { Filter mustChange = file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } List defaultCompactionManifests = new ArrayList<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = new HashSet<>(); - - if (totalDeltaFileSize >= manifestFullCompactionSize) { - // Full compact triggered: read delete entries and classify by predicate. - deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + Set deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); - } else { - predicate = PartitionPredicate.ALWAYS_TRUE; - } + predicate = PartitionPredicate.ALWAYS_TRUE; } + } - Iterator iterator = lsmFiles.iterator(); - while (iterator.hasNext()) { - ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { - iterator.remove(); - defaultCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - defaultCompactionManifests.add(file); - } + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + defaultCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + defaultCompactionManifests.add(file); } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); From 0604be9e0048955c3a4e96395c1f44e4c0637b07 Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 23:36:21 +0800 Subject: [PATCH 18/48] jili --- .../paimon/operation/ManifestFileMerger.java | 8 +- .../paimon/operation/ManifestFileSorter.java | 169 ++++++++++++------ 2 files changed, 125 insertions(+), 52 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index b6bfedbfd6bf..46371c698773 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,7 +207,13 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); - System.out.println("Start Manifest File Full Compaction: totalManifestSize: " + totalManifestSize + ", deltaDeleteFileNum " + deltaDeleteFileNum + ", totalDeltaFileSize " + totalDeltaFileSize); + System.out.println( + "Start Manifest File Full Compaction: totalManifestSize: " + + totalManifestSize + + ", deltaDeleteFileNum " + + deltaDeleteFileNum + + ", totalDeltaFileSize " + + totalDeltaFileSize); // 2.1. read all delete entries diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 0d85a4dd4ec1..dfa3b37bb95d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,19 +30,18 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.Filter; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.PriorityQueue; import java.util.Set; @@ -87,14 +86,40 @@ static Optional> trySortRewrite( classifyManifests( input, suggestedMetaSize, - manifestFullCompactionSize, manifestFile, partitionType, manifestReadParallelism); - List defaultCompactionManifests = classified.defaultCompactionManifests; + Map defaultCompactionMap = + classified.defaultCompactionManifests; + List defaultCompactionManifests = + new ArrayList<>(defaultCompactionMap.keySet()); List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; + // Print classify reasons + int smallCount = 0; + int deleteRangeCount = 0; + int bothCount = 0; + for (boolean[] reasons : defaultCompactionMap.values()) { + if (reasons[0] && reasons[1]) { + bothCount++; + } else if (reasons[0]) { + smallCount++; + } else if (reasons[1]) { + deleteRangeCount++; + } + } + System.out.println( + "[classifyManifests] defaultCompaction=" + + defaultCompactionMap.size() + + " (small=" + + smallCount + + ", inDeleteRange=" + + deleteRangeCount + + ", both=" + + bothCount + + ")"); + // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = lsmFiles.isEmpty() @@ -162,12 +187,12 @@ static Optional> trySortRewrite( List rewritten = rewriteSections( sections, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, maxRewriteSize, openFileCost, sortNewFiles, @@ -199,11 +224,7 @@ private static ClassifyResult classifyManifests( ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - - - List defaultCompactionManifests = new ArrayList<>(); + Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -224,17 +245,17 @@ private static ClassifyResult classifyManifests( Iterator iterator = lsmFiles.iterator(); while (iterator.hasNext()) { ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { + boolean small = file.fileSize() < suggestedMetaSize; + boolean inDeleteRange = + predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts()); + if (small || inDeleteRange) { iterator.remove(); - defaultCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - defaultCompactionManifests.add(file); + defaultCompactionManifests.put(file, new boolean[] {small, inDeleteRange}); } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -252,12 +273,12 @@ private static ClassifyResult classifyManifests( */ private static List rewriteSections( List
    sections, - Set defaultCompactionSet, + Map defaultCompactionMap, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, Set deleteEntries, - long manifestFullCompactionSize, + long suggestedMetaSize, long maxRewriteSize, long openFileCost, List sortNewFiles, @@ -267,6 +288,7 @@ private static List rewriteSections( long processedSize = 0; boolean reachedLimit = false; + long totalRewriteSubSegmentsMs = 0; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); @@ -276,17 +298,19 @@ private static List rewriteSections( result.addAll(section.files); } else { processedSize = processedSize + section.totalSizeWithCost; + long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, sortNewFiles, result, manifestReadParallelism); + totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; } @@ -327,7 +351,7 @@ private static List rewriteSections( remainingFiles.add(file); remainingSize += file.fileSize(); remainingSizeWithCost += fileCost; - if (defaultCompactionSet.contains(file)) { + if (defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } } @@ -362,21 +386,27 @@ private static List rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { + long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, sortNewFiles, result, manifestReadParallelism); + totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } else { result.addAll(section.files); } } + System.out.println( + "[rewriteSections] rewriteSubSegments total took " + + totalRewriteSubSegmentsMs + + " ms"); return result; } @@ -386,32 +416,29 @@ private static List rewriteSections( */ private static void rewriteSubSegments( List section, - Set defaultCompactionSet, + Map defaultCompactionMap, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, @Nullable Set deleteEntries, - long manifestFullCompactionSize, + long manifestTargetSize, List sortNewFiles, List result, @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; + long totalSmallCount = 0; + int rewriteCount = 0; for (ManifestFileMeta m : section) { - boolean shouldAccumulate = - defaultCompactionSet.contains(m) - && subSegmentSize + m.fileSize() < manifestFullCompactionSize; - - if (shouldAccumulate) { - // Continue accumulating - subSegment.add(m); - subSegmentSize += m.fileSize(); - } else { - // Need to break the segment - if (!subSegment.isEmpty()) { - // Process accumulated subSegment first - subSegment.add(m); + subSegmentSize += m.fileSize(); + subSegment.add(m); + + if (subSegmentSize >= manifestTargetSize) { + if (subSegment.size() == 1 + && (!defaultCompactionMap.containsKey(m) + || !defaultCompactionMap.get(m)[1])) result.add(m); + else { List merged = sortAndRewriteSection( subSegment, @@ -420,14 +447,28 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); + long smallCount = 0; + for (ManifestFileMeta f : merged) { + if (f.fileSize() < manifestTargetSize) { + smallCount++; + } + } + rewriteCount++; + totalSmallCount += smallCount; + System.out.println( + "[rewriteSubSegments] merged " + + subSegment.size() + + " -> " + + merged.size() + + " files, small files(<" + + manifestTargetSize + + "): " + + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); - subSegment.clear(); - subSegmentSize = 0; - } else { - // Directly add to result - result.add(m); } + subSegment.clear(); + subSegmentSize = 0; } } // Flush remaining sub-segment @@ -440,9 +481,33 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); + long smallCount = 0; + for (ManifestFileMeta f : merged) { + if (f.fileSize() < manifestTargetSize) { + smallCount++; + } + } + rewriteCount++; + totalSmallCount += smallCount; + System.out.println( + "[rewriteSubSegments-flush] merged " + + subSegment.size() + + " -> " + + merged.size() + + " files, small files(<" + + manifestTargetSize + + "): " + + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } + System.out.println( + "[rewriteSubSegments] sortAndRewriteSection called " + + rewriteCount + + " times, total small files: " + + totalSmallCount + + ", result size: " + + result.size()); } /** @@ -923,12 +988,14 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { - final List defaultCompactionManifests; + /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ + final Map defaultCompactionManifests; + final List lsmFiles; @Nullable final Set deleteEntries; ClassifyResult( - List defaultCompactionManifests, + Map defaultCompactionManifests, List lsmFiles, @Nullable Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; From dd85e5ab34be2892aa71f5b594d459102db5d90c Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 11:38:13 +0800 Subject: [PATCH 19/48] fix --- .../paimon/operation/ManifestFileSorter.java | 522 ++++++++---------- 1 file changed, 235 insertions(+), 287 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index dfa3b37bb95d..e4d87c4a91b8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -69,7 +69,6 @@ static Optional> trySortRewrite( throws Exception { // Extract configuration from options long suggestedMetaSize = options.manifestTargetSize().getBytes(); - long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); // Step 1: Resolve sort field. @@ -260,7 +259,213 @@ private static ClassifyResult classifyManifests( } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + // Step 1: Sort by min value (if equal, then by max value) + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + // Step 2: Interval graph coloring algorithm - assign files to runs + // Use priority queue to track runs by their max values + PriorityQueue> runs = + new PriorityQueue<>( + (r1, r2) -> { + ManifestFileMeta last1 = r1.get(r1.size() - 1); + ManifestFileMeta last2 = r2.get(r2.size() - 1); + return compareField( + last1.partitionStats().maxValues(), + last2.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + for (ManifestFileMeta file : input) { + boolean addedToExisting = false; + + // Try to find a run where current file's min >= run's max + if (!runs.isEmpty()) { + List earliestRun = runs.peek(); + ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); + + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + // Current file can be added to this run + runs.poll(); + earliestRun.add(file); + runs.offer(earliestRun); + addedToExisting = true; + } + } + + if (!addedToExisting) { + // Create a new run + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); + } + } + + // Step 3: Convert to ManifestSortedRun list + List result = new ArrayList<>(); + while (!runs.isEmpty()) { + result.add(ManifestSortedRun.fromSorted(runs.poll())); + } + + // Step 4: Sort by totalSize and assign levels + result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = result.size(); + int maxLevel = 4; + for (int i = 0; i < n; i++) { + if (i >= n - maxLevel) { + result.get(i).setLevel(i - (n - maxLevel) + 1); + } else { + result.get(i).setLevel(0); + } + } + System.out.println("run num: " + result.size()); + return result; + } + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. + */ + static List
    splitIntoSections( + List pickedFiles, + int sortFieldIndex, + DataType sortFieldType, + Set defaultCompactionSet, + long openFileCost) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List
    sections = new ArrayList<>(); + List currentFiles = new ArrayList<>(); + long currentTotalSize = 0; + long currentTotalSizeWithCost = 0; + boolean currentHasDefault = false; + ManifestFileMeta first = pickedFiles.get(0); + currentFiles.add(first); + currentTotalSize += first.fileSize(); + currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); + currentHasDefault = defaultCompactionSet.contains(first); + BinaryRow sectionMaxBound = first.partitionStats().maxValues(); + + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + >= 0) { + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); + currentFiles = new ArrayList<>(); + currentTotalSize = 0; + currentTotalSizeWithCost = 0; + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); + currentHasDefault = defaultCompactionSet.contains(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); + if (!currentHasDefault && defaultCompactionSet.contains(file)) { + currentHasDefault = true; + } + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); + return sections; + } + + /** + * Merge small adjacent sections to avoid producing too many small rewrite batches. If either + * the pending section or the current section total size is smaller than half of {@code + * suggestedMetaSize}, they are combined into a single section. + */ + private static List
    mergeSmallAdjacentSections( + List
    sections, long suggestedMetaSize) { + long smallThreshold = suggestedMetaSize / 2; + List
    merged = new ArrayList<>(); + Section pending = null; + + for (Section section : sections) { + if (pending == null) { + pending = section; + } else { + if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + pending = Section.merge(pending, section); + } else { + merged.add(pending); + pending = section; + } + } + } + if (pending != null) { + merged.add(pending); + } + return merged; + } /** * Iterate over sections, decide whether to rewrite each section fully or partially based on the * maxRewriteSize threshold and whether the section contains defaultCompaction files. @@ -297,19 +502,17 @@ private static List rewriteSections( if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { - processedSize = processedSize + section.totalSizeWithCost; long t0 = System.currentTimeMillis(); - rewriteSubSegments( - section.files, - defaultCompactionMap, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - suggestedMetaSize, - sortNewFiles, - result, - manifestReadParallelism); + List merged = + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; @@ -510,261 +713,6 @@ private static void rewriteSubSegments( + result.size()); } - /** - * Partial rewrite of a section: only rewrite files that fit within the remaining budget. Files - * beyond the budget are kept as-is. - */ - private static void partialRewriteSection( - List sectionFiles, - long remaining, - long openFileCost, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - @Nullable Set deleteEntries, - List sortNewFiles, - List result, - @Nullable Integer manifestReadParallelism) - throws Exception { - List toRewrite = new ArrayList<>(); - int splitIndex = 0; - long partialSize = 0; - for (int i = 0; i < sectionFiles.size(); i++) { - long fileCost = Math.max(sectionFiles.get(i).fileSize(), openFileCost); - if (partialSize + fileCost > remaining) { - break; - } - toRewrite.add(sectionFiles.get(i)); - partialSize += fileCost; - splitIndex = i + 1; - } - if (toRewrite.size() > 1) { - List merged = - sortAndRewriteSection( - toRewrite, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } else { - result.addAll(toRewrite); - } - for (int i = splitIndex; i < sectionFiles.size(); i++) { - result.add(sectionFiles.get(i)); - } - } - - /** - * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, - * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 - * largest to level 1~4, rest to level 0). - */ - static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { - // Step 1: Sort by min value (if equal, then by max value) - input.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - // Step 2: Interval graph coloring algorithm - assign files to runs - // Use priority queue to track runs by their max values - PriorityQueue> runs = - new PriorityQueue<>( - (r1, r2) -> { - ManifestFileMeta last1 = r1.get(r1.size() - 1); - ManifestFileMeta last2 = r2.get(r2.size() - 1); - return compareField( - last1.partitionStats().maxValues(), - last2.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - for (ManifestFileMeta file : input) { - boolean addedToExisting = false; - - // Try to find a run where current file's min >= run's max - if (!runs.isEmpty()) { - List earliestRun = runs.peek(); - ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); - - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - // Current file can be added to this run - runs.poll(); - earliestRun.add(file); - runs.offer(earliestRun); - addedToExisting = true; - } - } - - if (!addedToExisting) { - // Create a new run - List newRun = new ArrayList<>(); - newRun.add(file); - runs.offer(newRun); - } - } - - // Step 3: Convert to ManifestSortedRun list - List result = new ArrayList<>(); - while (!runs.isEmpty()) { - result.add(ManifestSortedRun.fromSorted(runs.poll())); - } - - // Step 4: Sort by totalSize and assign levels - result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = result.size(); - int maxLevel = 4; - for (int i = 0; i < n; i++) { - if (i >= n - maxLevel) { - result.get(i).setLevel(i - (n - maxLevel) + 1); - } else { - result.get(i).setLevel(0); - } - } - System.out.println("run num: " + result.size()); - return result; - } - - /** - * Split picked files into sections. Files with overlapping sort-key intervals go into the same - * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. - */ - static List
    splitIntoSections( - List pickedFiles, - int sortFieldIndex, - DataType sortFieldType, - Set defaultCompactionSet, - long openFileCost) { - pickedFiles.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List
    sections = new ArrayList<>(); - List currentFiles = new ArrayList<>(); - long currentTotalSize = 0; - long currentTotalSizeWithCost = 0; - boolean currentHasDefault = false; - ManifestFileMeta first = pickedFiles.get(0); - currentFiles.add(first); - currentTotalSize += first.fileSize(); - currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(first); - BinaryRow sectionMaxBound = first.partitionStats().maxValues(); - - for (int i = 1; i < pickedFiles.size(); i++) { - ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - >= 0) { - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); - currentFiles = new ArrayList<>(); - currentTotalSize = 0; - currentTotalSizeWithCost = 0; - currentFiles.add(file); - currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(file); - sectionMaxBound = file.partitionStats().maxValues(); - } else { - currentFiles.add(file); - currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - if (!currentHasDefault && defaultCompactionSet.contains(file)) { - currentHasDefault = true; - } - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sectionMaxBound = file.partitionStats().maxValues(); - } - } - } - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); - return sections; - } - - /** - * Merge small adjacent sections to avoid producing too many small rewrite batches. If either - * the pending section or the current section total size is smaller than half of {@code - * suggestedMetaSize}, they are combined into a single section. - */ - private static List
    mergeSmallAdjacentSections( - List
    sections, long suggestedMetaSize) { - long smallThreshold = suggestedMetaSize / 2; - List
    merged = new ArrayList<>(); - Section pending = null; - - for (Section section : sections) { - if (pending == null) { - pending = section; - } else { - if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { - pending = Section.merge(pending, section); - } else { - merged.add(pending); - pending = section; - } - } - } - if (pending != null) { - merged.add(pending); - } - return merged; - } - /** * Read all entries from a section's manifest files, sort them in memory by the specified * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving @@ -849,6 +797,25 @@ private static List sortAndRewriteSection( return result; } + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + // ADD before DELETE, so that mergeEntries can correctly cancel pairs + int kindCmp = a.kind().compareTo(b.kind()); + if (kindCmp != 0) { + return kindCmp; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + /** * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. */ @@ -893,25 +860,6 @@ static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { } } - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - // ADD before DELETE, so that mergeEntries can correctly cancel pairs - int kindCmp = a.kind().compareTo(b.kind()); - if (kindCmp != 0) { - return kindCmp; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - /** * Resolve the partition field to sort manifests by. * From 3ca0c5b9fa57ef0c574d9675d2347a20c1f77f70 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 15:39:43 +0800 Subject: [PATCH 20/48] rmPrint --- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 137 ++---------------- 2 files changed, 11 insertions(+), 128 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 3ce03942070a..75f3f818bfd7 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -498,7 +498,7 @@ public InlineElement getDescription() { public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = key("manifest-sort.open-file-cost") .memoryType() - .defaultValue(MemorySize.ofKibiBytes(40)) + .defaultValue(MemorySize.ofMebiBytes(4)) .withDescription( "Open file cost of a manifest file during sort rewrite. " + "It is added to each manifest file's size when computing " diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index e4d87c4a91b8..b462796477de 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -90,34 +90,9 @@ static Optional> trySortRewrite( manifestReadParallelism); Map defaultCompactionMap = classified.defaultCompactionManifests; - List defaultCompactionManifests = - new ArrayList<>(defaultCompactionMap.keySet()); List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; - // Print classify reasons - int smallCount = 0; - int deleteRangeCount = 0; - int bothCount = 0; - for (boolean[] reasons : defaultCompactionMap.values()) { - if (reasons[0] && reasons[1]) { - bothCount++; - } else if (reasons[0]) { - smallCount++; - } else if (reasons[1]) { - deleteRangeCount++; - } - } - System.out.println( - "[classifyManifests] defaultCompaction=" - + defaultCompactionMap.size() - + " (small=" - + smallCount - + ", inDeleteRange=" - + deleteRangeCount - + ", both=" - + bothCount - + ")"); // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = @@ -132,7 +107,7 @@ static Optional> trySortRewrite( new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); List pickedRuns = pickStrategy.pick(levelRuns); - if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); return Optional.of(input); @@ -144,7 +119,7 @@ static Optional> trySortRewrite( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionManifests.size()); + defaultCompactionMap.size()); Set pickedSet = new HashSet<>(pickedRuns); List reusedFiles = new ArrayList<>(); @@ -160,9 +135,8 @@ static Optional> trySortRewrite( for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionManifests); + pickedFiles.addAll(defaultCompactionMap.keySet()); - Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); long maxRewriteSize = options.manifestSortMaxRewriteSize(); long openFileCost = options.manifestSortOpenFileCost(); @@ -171,14 +145,9 @@ static Optional> trySortRewrite( pickedFiles, sortFieldIndex, sortFieldType, - defaultCompactionSet, + defaultCompactionMap, openFileCost); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - System.out.println( - "After splitIntoSections: sections=" - + sections.size() - + ", pickedFiles=" - + pickedFiles.size()); LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); List sortNewFiles = new ArrayList<>(); @@ -358,7 +327,7 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Set defaultCompactionSet, + Map defaultCompactionMap, long openFileCost) { pickedFiles.sort( (a, b) -> { @@ -387,7 +356,7 @@ static List
    splitIntoSections( currentFiles.add(first); currentTotalSize += first.fileSize(); currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(first); + currentHasDefault = defaultCompactionMap.containsKey(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); for (int i = 1; i < pickedFiles.size(); i++) { @@ -410,13 +379,13 @@ static List
    splitIntoSections( currentFiles.add(file); currentTotalSize += file.fileSize(); currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(file); + currentHasDefault = defaultCompactionMap.containsKey(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - if (!currentHasDefault && defaultCompactionSet.contains(file)) { + if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } if (compareField( @@ -445,7 +414,6 @@ static List
    splitIntoSections( */ private static List
    mergeSmallAdjacentSections( List
    sections, long suggestedMetaSize) { - long smallThreshold = suggestedMetaSize / 2; List
    merged = new ArrayList<>(); Section pending = null; @@ -453,7 +421,7 @@ private static List
    mergeSmallAdjacentSections( if (pending == null) { pending = section; } else { - if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + if (pending.totalSize < suggestedMetaSize || section.totalSize < suggestedMetaSize) { pending = Section.merge(pending, section); } else { merged.add(pending); @@ -491,9 +459,7 @@ private static List rewriteSections( throws Exception { List result = new ArrayList<>(); long processedSize = 0; - boolean reachedLimit = false; - long totalRewriteSubSegmentsMs = 0; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); @@ -502,7 +468,6 @@ private static List rewriteSections( if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { - long t0 = System.currentTimeMillis(); List merged = sortAndRewriteSection( section.files, @@ -513,7 +478,6 @@ private static List rewriteSections( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; } @@ -589,7 +553,6 @@ private static List rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, defaultCompactionMap, @@ -601,21 +564,15 @@ private static List rewriteSections( sortNewFiles, result, manifestReadParallelism); - totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } else { result.addAll(section.files); } } - System.out.println( - "[rewriteSections] rewriteSubSegments total took " - + totalRewriteSubSegmentsMs - + " ms"); return result; } /** - * Rewrite sub-segments within a section that exceeds the rewrite threshold. Only sub-segments - * containing defaultCompaction files are rewritten; other files are kept as-is. + * Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, @@ -631,8 +588,6 @@ private static void rewriteSubSegments( throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; - long totalSmallCount = 0; - int rewriteCount = 0; for (ManifestFileMeta m : section) { subSegmentSize += m.fileSize(); subSegment.add(m); @@ -650,23 +605,6 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); - long smallCount = 0; - for (ManifestFileMeta f : merged) { - if (f.fileSize() < manifestTargetSize) { - smallCount++; - } - } - rewriteCount++; - totalSmallCount += smallCount; - System.out.println( - "[rewriteSubSegments] merged " - + subSegment.size() - + " -> " - + merged.size() - + " files, small files(<" - + manifestTargetSize - + "): " - + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } @@ -684,33 +622,9 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); - long smallCount = 0; - for (ManifestFileMeta f : merged) { - if (f.fileSize() < manifestTargetSize) { - smallCount++; - } - } - rewriteCount++; - totalSmallCount += smallCount; - System.out.println( - "[rewriteSubSegments-flush] merged " - + subSegment.size() - + " -> " - + merged.size() - + " files, small files(<" - + manifestTargetSize - + "): " - + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } - System.out.println( - "[rewriteSubSegments] sortAndRewriteSection called " - + rewriteCount - + " times, total small files: " - + totalSmallCount - + ", result size: " - + result.size()); } /** @@ -731,16 +645,7 @@ private static List sortAndRewriteSection( Set deletedIdentifiers, @Nullable Integer manifestReadParallelism) throws Exception { - if (section.size() == 1 && deletedIdentifiers.isEmpty()) { - return section; - } - long totalStart = System.currentTimeMillis(); - long readTime = 0; - long sortTime = 0; - long writeTime = 0; - // Parallel read: each meta is read independently - long readStart = System.currentTimeMillis(); Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -749,15 +654,11 @@ private static List sortAndRewriteSection( sequentialBatchedExecute(reader, section, manifestReadParallelism)) { entriesToRewrite.addAll(readResult.entries); } - readTime = System.currentTimeMillis() - readStart; List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { - long sortStart = System.currentTimeMillis(); entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - sortTime = System.currentTimeMillis() - sortStart; - long writeStart = System.currentTimeMillis(); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -775,25 +676,7 @@ private static List sortAndRewriteSection( writer.close(); } result.addAll(writer.result()); - writeTime = System.currentTimeMillis() - writeStart; } - - long totalTime = System.currentTimeMillis() - totalStart; - if (totalTime > 0) { - System.out.println( - String.format( - "[sortAndRewriteSection] Total: %d ms, Read: %d ms (%.1f%%), Sort: %d ms (%.1f%%), Write: %d ms (%.1f%%), Entries: %d, Files: %d", - totalTime, - readTime, - 100.0 * readTime / totalTime, - sortTime, - 100.0 * sortTime / totalTime, - writeTime, - 100.0 * writeTime / totalTime, - entriesToRewrite.size(), - result.size())); - } - return result; } From f272a281da1877b490b543e464672642fc37c1f7 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 16:58:26 +0800 Subject: [PATCH 21/48] simplied --- .../paimon/operation/ManifestFileSorter.java | 144 +++++++++--------- 1 file changed, 68 insertions(+), 76 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b462796477de..b18f7298e0f1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -93,7 +93,6 @@ static Optional> trySortRewrite( List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; - // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = lsmFiles.isEmpty() @@ -315,7 +314,6 @@ static List buildLevelSortedRuns( result.get(i).setLevel(0); } } - System.out.println("run num: " + result.size()); return result; } @@ -421,7 +419,8 @@ private static List
    mergeSmallAdjacentSections( if (pending == null) { pending = section; } else { - if (pending.totalSize < suggestedMetaSize || section.totalSize < suggestedMetaSize) { + if (pending.totalSize < suggestedMetaSize + || section.totalSize < suggestedMetaSize) { pending = Section.merge(pending, section); } else { merged.add(pending); @@ -465,20 +464,16 @@ private static List rewriteSections( Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { - if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { - result.addAll(section.files); - } else { - List merged = - sortAndRewriteSection( - section.files, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); continue; } long sectionSize = section.totalSizeWithCost; @@ -486,16 +481,16 @@ private static List rewriteSections( if (!exceedsThreshold) { processedSize += sectionSize; - List merged = - sortAndRewriteSection( - section.files, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); } else if (!reachedLimit) { // First time exceeding threshold without defaultCompaction: // partial rewrite within remaining budget. @@ -524,21 +519,16 @@ private static List rewriteSections( } } - if (toRewrite.size() > 1) { - List merged = - sortAndRewriteSection( - toRewrite, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } else if (toRewrite.size() == 1) { - sortNewFiles.addAll(toRewrite); - result.addAll(toRewrite); - } + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); // Create new section for remaining files and append to sections list if (!remainingFiles.isEmpty()) { @@ -561,8 +551,8 @@ private static List rewriteSections( sortFieldType, deleteEntries, suggestedMetaSize, - sortNewFiles, result, + sortNewFiles, manifestReadParallelism); } else { result.addAll(section.files); @@ -571,9 +561,7 @@ private static List rewriteSections( return result; } - /** - * Rewrite sub-segments within a section that exceeds the rewrite threshold. - */ + /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -582,8 +570,8 @@ private static void rewriteSubSegments( DataType sortFieldType, @Nullable Set deleteEntries, long manifestTargetSize, - List sortNewFiles, List result, + List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); @@ -593,37 +581,32 @@ private static void rewriteSubSegments( subSegment.add(m); if (subSegmentSize >= manifestTargetSize) { - if (subSegment.size() == 1 - && (!defaultCompactionMap.containsKey(m) - || !defaultCompactionMap.get(m)[1])) result.add(m); - else { - List merged = - sortAndRewriteSection( - subSegment, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); subSegment.clear(); subSegmentSize = 0; } } // Flush remaining sub-segment if (!subSegment.isEmpty()) { - List merged = - sortAndRewriteSection( - subSegment, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); } } @@ -637,14 +620,23 @@ private static void rewriteSubSegments( *

    Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as * {@link ManifestFileMerger#tryFullCompaction}. */ - private static List sortAndRewriteSection( + private static void sortAndRewriteSection( List section, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, Set deletedIdentifiers, + Map defaultCompactionMap, + List result, + List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { + if (section.size() == 1 + && (!defaultCompactionMap.containsKey(section.get(0)) + || !defaultCompactionMap.get(section.get(0))[1])) { + result.add(section.get(0)); + return; + } // Parallel read: each meta is read independently Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -655,7 +647,6 @@ private static List sortAndRewriteSection( entriesToRewrite.addAll(readResult.entries); } - List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); @@ -675,9 +666,10 @@ private static List sortAndRewriteSection( } writer.close(); } - result.addAll(writer.result()); + List sorted = writer.result(); + result.addAll(sorted); + sortNewFiles.addAll(sorted); } - return result; } /** From 16ad162780a1aaa10a56b7f178299972350cdfa3 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 17:36:35 +0800 Subject: [PATCH 22/48] fix --- .../org/apache/paimon/operation/ManifestFileMerger.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 46371c698773..e197868849c1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,13 +207,6 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); - System.out.println( - "Start Manifest File Full Compaction: totalManifestSize: " - + totalManifestSize - + ", deltaDeleteFileNum " - + deltaDeleteFileNum - + ", totalDeltaFileSize " - + totalDeltaFileSize); // 2.1. read all delete entries From c04c3784eedf12cc25ef7e89f9e60b8acfe7091c Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 19:05:19 +0800 Subject: [PATCH 23/48] rmOpenFileCost --- .../generated/core_configuration.html | 7 +-- .../java/org/apache/paimon/CoreOptions.java | 14 ----- .../paimon/operation/ManifestFileSorter.java | 58 ++++--------------- 3 files changed, 12 insertions(+), 67 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index daae088f61c5..933d00644bce 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -915,12 +915,7 @@ MemorySize Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. - -

    manifest-sort.open-file-cost
    - 4 mb - MemorySize - Open file cost of a manifest file during sort rewrite. It is added to each manifest file's size when computing section size, to avoid rewriting too many small manifest files in a single section. - +
    manifest-sort.partition-field
    (none) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 75f3f818bfd7..33a3e8afc3af 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -495,16 +495,6 @@ public InlineElement getDescription() { + " skipped. Set to a larger value to allow more aggressive" + " sort rewriting."); - public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = - key("manifest-sort.open-file-cost") - .memoryType() - .defaultValue(MemorySize.ofMebiBytes(4)) - .withDescription( - "Open file cost of a manifest file during sort rewrite. " - + "It is added to each manifest file's size when computing " - + "section size, to avoid rewriting too many small manifest " - + "files in a single section."); - public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2613,10 +2603,6 @@ public long manifestSortMaxRewriteSize() { return options.get(MANIFEST_SORT_MAX_REWRITE_SIZE).getBytes(); } - public long manifestSortOpenFileCost() { - return options.get(MANIFEST_SORT_OPEN_FILE_COST).getBytes(); - } - public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b18f7298e0f1..445921dea943 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,10 +30,12 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; + import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; @@ -137,15 +139,9 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionMap.keySet()); long maxRewriteSize = options.manifestSortMaxRewriteSize(); - long openFileCost = options.manifestSortOpenFileCost(); List
    sections = - splitIntoSections( - pickedFiles, - sortFieldIndex, - sortFieldType, - defaultCompactionMap, - openFileCost); + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); @@ -161,7 +157,6 @@ static Optional> trySortRewrite( deleteEntries, suggestedMetaSize, maxRewriteSize, - openFileCost, sortNewFiles, manifestReadParallelism); result.addAll(rewritten); @@ -325,8 +320,7 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Map defaultCompactionMap, - long openFileCost) { + Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = @@ -348,12 +342,10 @@ static List
    splitIntoSections( List
    sections = new ArrayList<>(); List currentFiles = new ArrayList<>(); long currentTotalSize = 0; - long currentTotalSizeWithCost = 0; boolean currentHasDefault = false; ManifestFileMeta first = pickedFiles.get(0); currentFiles.add(first); currentTotalSize += first.fileSize(); - currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); currentHasDefault = defaultCompactionMap.containsKey(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); @@ -365,24 +357,16 @@ static List
    splitIntoSections( sortFieldIndex, sortFieldType) >= 0) { - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; - currentTotalSizeWithCost = 0; currentFiles.add(file); currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); currentHasDefault = defaultCompactionMap.containsKey(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } @@ -396,12 +380,7 @@ static List
    splitIntoSections( } } } - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); return sections; } @@ -452,7 +431,6 @@ private static List rewriteSections( Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, - long openFileCost, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -476,7 +454,7 @@ private static List rewriteSections( manifestReadParallelism); continue; } - long sectionSize = section.totalSizeWithCost; + long sectionSize = section.totalSize; boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; if (!exceedsThreshold) { @@ -501,18 +479,15 @@ private static List rewriteSections( List remainingFiles = new ArrayList<>(); long rewriteSize = 0; long remainingSize = 0; - long remainingSizeWithCost = 0; boolean remainingHasDefault = false; for (ManifestFileMeta file : section.files) { - long fileCost = Math.max(file.fileSize(), openFileCost); - if (rewriteSize + fileCost <= remaining) { + if (rewriteSize + file.fileSize() <= remaining) { toRewrite.add(file); - rewriteSize += fileCost; + rewriteSize += file.fileSize(); } else { remainingFiles.add(file); remainingSize += file.fileSize(); - remainingSizeWithCost += fileCost; if (defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } @@ -533,11 +508,7 @@ private static List rewriteSections( // Create new section for remaining files and append to sections list if (!remainingFiles.isEmpty()) { Section remainingSection = - new Section( - remainingFiles, - remainingSize, - remainingSizeWithCost, - remainingHasDefault); + new Section(remainingFiles, remainingSize, remainingHasDefault); // Append remaining section to the end of sections list sections.add(remainingSection); } @@ -783,17 +754,11 @@ private static FullCompactionReadResult readForSortRewrite( static class Section { final List files; final long totalSize; - final long totalSizeWithCost; final boolean hasDefaultCompactMeta; - Section( - List files, - long totalSize, - long totalSizeWithCost, - boolean hasDefaultCompactMeta) { + Section(List files, long totalSize, boolean hasDefaultCompactMeta) { this.files = files; this.totalSize = totalSize; - this.totalSizeWithCost = totalSizeWithCost; this.hasDefaultCompactMeta = hasDefaultCompactMeta; } @@ -804,7 +769,6 @@ static Section merge(Section a, Section b) { return new Section( merged, a.totalSize + b.totalSize, - a.totalSizeWithCost + b.totalSizeWithCost, a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } From 4fd0d05651c4c97fb4c8889dbd0d14ba9dfadebc Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 20:52:46 +0800 Subject: [PATCH 24/48] spotless --- .../generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 4 +- paimon-core/pom.xml | 15 ---- .../paimon/operation/FileStoreCommitImpl.java | 2 +- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 80 ++++++++----------- .../operation/ManifestPickStrategy.java | 5 +- 7 files changed, 43 insertions(+), 70 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 933d00644bce..d0d9a0f26cd6 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -907,7 +907,7 @@
    manifest-sort.enabled
    false Boolean - Whether to invoke manifest sort rewrite right after manifest merge during commit. + Whether to invoke manifest sort rewrite during commit.
    manifest-sort.max-rewrite-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 33a3e8afc3af..e1acba90a8bc 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -473,9 +473,7 @@ public InlineElement getDescription() { key("manifest-sort.enabled") .booleanType() .defaultValue(false) - .withDescription( - "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit."); + .withDescription("Whether to invoke manifest sort rewrite during commit."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml index e570324ee9e3..9506bdf03959 100644 --- a/paimon-core/pom.xml +++ b/paimon-core/pom.xml @@ -36,21 +36,6 @@ under the License. - - com.aliyun.jindodata - jindo-core-macos-11_0-aarch64 - 6.9.1 - - - - - - - - org.apache.paimon - paimon-ali-jindo - 1.4-ali-SNAPSHOT - org.apache.paimon paimon-common diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index df24f019834b..0e537d733736 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -1189,7 +1189,7 @@ private boolean compactManifestOnce() { mergeBeforeManifests, manifestFile, partitionType, - CoreOptions.fromMap(compactOptions.toMap())); + new CoreOptions(compactOptions)); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index e197868849c1..36de3d2ecdef 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -85,7 +85,7 @@ public static List merge( } // Otherwise try full compaction first, then minor compaction if needed - Optional> merged = + Optional> fullCompacted = tryFullCompaction( input, newFilesForAbort, @@ -94,8 +94,7 @@ public static List merge( manifestFullCompactionSize, partitionType, manifestReadParallelism); - - return merged.orElseGet( + return fullCompacted.orElseGet( () -> tryMinorCompaction( input, diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 445921dea943..fcc06a0591f8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,6 +30,7 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.Filter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,16 +103,15 @@ static Optional> trySortRewrite( : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); // Step 4: Pick runs to compact. - int sizeAmpThreshold = options.maxSizeAmplificationPercent(); - int sizeRatioThreshold = options.sortedRunSizeRatio(); ManifestPickStrategy pickStrategy = - new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + new ManifestPickStrategy( + options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); List pickedRuns = pickStrategy.pick(levelRuns); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); - return Optional.of(input); + return Optional.empty(); } LOG.info( @@ -138,34 +138,27 @@ static Optional> trySortRewrite( } pickedFiles.addAll(defaultCompactionMap.keySet()); - long maxRewriteSize = options.manifestSortMaxRewriteSize(); - List
    sections = splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); - - List sortNewFiles = new ArrayList<>(); - List rewritten = - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - suggestedMetaSize, - maxRewriteSize, - sortNewFiles, - manifestReadParallelism); - result.addAll(rewritten); + rewriteSections( + sections, + defaultCompactionMap, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + options.manifestSortMaxRewriteSize(), + result, + newFilesForAbort, + manifestReadParallelism); - newFilesForAbort.addAll(sortNewFiles); LOG.info( "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), - sortNewFiles.size(), + newFilesForAbort.size(), result.size()); return Optional.of(result); } @@ -419,10 +412,8 @@ private static List
    mergeSmallAdjacentSections( *

    Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds * threshold with no defaultCompaction files: skip (keep as-is). - * - * @return the list of result manifest files (both rewritten and kept-as-is) */ - private static List rewriteSections( + private static void rewriteSections( List

    sections, Map defaultCompactionMap, ManifestFile manifestFile, @@ -431,16 +422,15 @@ private static List rewriteSections( Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, + List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { - List result = new ArrayList<>(); long processedSize = 0; boolean reachedLimit = false; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); - // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { sortAndRewriteSection( section.files, @@ -454,11 +444,9 @@ private static List rewriteSections( manifestReadParallelism); continue; } - long sectionSize = section.totalSize; - boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; - if (!exceedsThreshold) { - processedSize += sectionSize; + if (processedSize + section.totalSize <= maxRewriteSize) { + processedSize += section.totalSize; sortAndRewriteSection( section.files, manifestFile, @@ -472,18 +460,18 @@ private static List rewriteSections( } else if (!reachedLimit) { // First time exceeding threshold without defaultCompaction: // partial rewrite within remaining budget. - long remaining = maxRewriteSize - processedSize; - processedSize += sectionSize; + long rewriteTotalSize = maxRewriteSize - processedSize; + processedSize += section.totalSize; // Split section into two parts: files within budget and remaining files - List toRewrite = new ArrayList<>(); + List rewriteFiles = new ArrayList<>(); List remainingFiles = new ArrayList<>(); long rewriteSize = 0; long remainingSize = 0; boolean remainingHasDefault = false; for (ManifestFileMeta file : section.files) { - if (rewriteSize + file.fileSize() <= remaining) { - toRewrite.add(file); + if (rewriteSize + file.fileSize() <= rewriteTotalSize) { + rewriteFiles.add(file); rewriteSize += file.fileSize(); } else { remainingFiles.add(file); @@ -495,7 +483,7 @@ private static List rewriteSections( } sortAndRewriteSection( - toRewrite, + rewriteFiles, manifestFile, sortFieldIndex, sortFieldType, @@ -529,7 +517,6 @@ private static List rewriteSections( result.addAll(section.files); } } - return result; } /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ @@ -625,9 +612,7 @@ private static void sortAndRewriteSection( manifestFile.createRollingWriter(); Exception exception = null; try { - for (ManifestEntry entry : entriesToRewrite) { - writer.write(entry); - } + writer.write(entriesToRewrite); } catch (Exception e) { exception = e; } finally { @@ -654,7 +639,7 @@ static int compareSortKey( if (c != 0) { return c; } - // ADD before DELETE, so that mergeEntries can correctly cancel pairs + // ADD before DELETE int kindCmp = a.kind().compareTo(b.kind()); if (kindCmp != 0) { return kindCmp; @@ -741,7 +726,12 @@ private static FullCompactionReadResult readForSortRewrite( if (deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + for (ManifestEntry entry : + manifestFile.read( + meta.fileName(), + meta.fileSize(), + FileEntry.addFilter(), + Filter.alwaysTrue())) { if (!deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 9744df5d7f1c..2cc9faf2ec26 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -40,6 +40,7 @@ public class ManifestPickStrategy { private final int sizeAmpThreshold; private final int sizeRatioThreshold; + private static final int MAX_LEVEL = 4; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); @@ -55,7 +56,7 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @return list of picked runs to compact */ public List pick(List levelRuns) { - if (levelRuns.isEmpty() || levelRuns.size() < 5) { + if (levelRuns.isEmpty() || levelRuns.size() <= MAX_LEVEL) { return new ArrayList<>(); } @@ -131,7 +132,7 @@ private List pickForSizeRatioAndForce(List pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize) { + if (pickedSize * (100 + sizeRatioThreshold) >= nextRunSize * 100L) { picked.add(run); pickedSize += nextRunSize; } From d2aca057ffbeedcf219b0524836f15d613df27c3 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 22:19:52 +0800 Subject: [PATCH 25/48] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index fcc06a0591f8..4c3ef1af7a81 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -83,7 +83,7 @@ static Optional> trySortRewrite( int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); - // Step 2: Classify manifests into defaultCompaction and LSM groups. + // Step 2: Classify manifests into defaultCompaction and LSM. ClassifyResult classified = classifyManifests( input, From 115e2e6ef9d97ba6c265e4d28d6ca30669859f67 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 23:14:58 +0800 Subject: [PATCH 26/48] fmt --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 4c3ef1af7a81..64f93218342c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -215,6 +215,7 @@ private static ClassifyResult classifyManifests( } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } + /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -405,6 +406,7 @@ private static List
    mergeSmallAdjacentSections( } return merged; } + /** * Iterate over sections, decide whether to rewrite each section fully or partially based on the * maxRewriteSize threshold and whether the section contains defaultCompaction files. From 531678666b009c5c6e68540f9b017357e2c660a7 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 10:05:32 +0800 Subject: [PATCH 27/48] doc --- .../shortcodes/generated/core_configuration.html | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index d0d9a0f26cd6..a796f7cb5fd8 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -909,19 +909,18 @@ Boolean Whether to invoke manifest sort rewrite during commit. - -
    manifest-sort.max-rewrite-size
    - 256 mb - MemorySize - Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. - -
    manifest-sort.partition-field
    (none) String Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field. + +
    manifest-sort.max-rewrite-size
    + 256 mb + MemorySize + Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. +
    manifest.target-file-size
    8 mb From ac1ba13a1470c832655dec442d95476dabce9eec Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 12:57:19 +0800 Subject: [PATCH 28/48] comparator --- .../paimon/operation/ManifestFileSorter.java | 186 +++++------------- .../paimon/manifest/ManifestFileMetaTest.java | 128 ++---------- 2 files changed, 64 insertions(+), 250 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 64f93218342c..7220aad93acd 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -19,6 +19,8 @@ package org.apache.paimon.operation; import org.apache.paimon.CoreOptions; +import org.apache.paimon.codegen.CodeGenUtils; +import org.apache.paimon.codegen.RecordComparator; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; @@ -27,8 +29,6 @@ import org.apache.paimon.manifest.ManifestFileMeta; import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.types.DataType; -import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -81,7 +81,9 @@ static Optional> trySortRewrite( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + RecordComparator fieldComparator = + CodeGenUtils.newRecordComparator( + partitionType.getFieldTypes(), new int[] {sortFieldIndex}); // Step 2: Classify manifests into defaultCompaction and LSM. ClassifyResult classified = @@ -100,7 +102,7 @@ static Optional> trySortRewrite( List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + : buildLevelSortedRuns(lsmFiles, fieldComparator); // Step 4: Pick runs to compact. ManifestPickStrategy pickStrategy = @@ -139,15 +141,14 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionMap.keySet()); List
    sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); + splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); rewriteSections( sections, defaultCompactionMap, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, suggestedMetaSize, options.manifestSortMaxRewriteSize(), @@ -222,24 +223,18 @@ private static ClassifyResult classifyManifests( * largest to level 1~4, rest to level 0). */ static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { + List input, RecordComparator fieldComparator) { // Step 1: Sort by min value (if equal, then by max value) input.sort( (a, b) -> { int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); + fieldComparator.compare( + a.partitionStats().minValues(), b.partitionStats().minValues()); if (cmp != 0) { return cmp; } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + return fieldComparator.compare( + a.partitionStats().maxValues(), b.partitionStats().maxValues()); }); // Step 2: Interval graph coloring algorithm - assign files to runs @@ -249,37 +244,28 @@ static List buildLevelSortedRuns( (r1, r2) -> { ManifestFileMeta last1 = r1.get(r1.size() - 1); ManifestFileMeta last2 = r2.get(r2.size() - 1); - return compareField( + return fieldComparator.compare( last1.partitionStats().maxValues(), - last2.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + last2.partitionStats().maxValues()); }); for (ManifestFileMeta file : input) { - boolean addedToExisting = false; - - // Try to find a run where current file's min >= run's max - if (!runs.isEmpty()) { - List earliestRun = runs.peek(); - ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); - - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - // Current file can be added to this run - runs.poll(); - earliestRun.add(file); - runs.offer(earliestRun); - addedToExisting = true; - } - } - - if (!addedToExisting) { - // Create a new run + List earliestRun = runs.poll(); + if (earliestRun == null) { + // No existing runs, create a new one + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); + } else if (fieldComparator.compare( + file.partitionStats().minValues(), + earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) + >= 0) { + // Current file's min >= run's max, append to this run + earliestRun.add(file); + runs.offer(earliestRun); + } else { + // Overlap detected, put the run back and create a new one + runs.offer(earliestRun); List newRun = new ArrayList<>(); newRun.add(file); runs.offer(newRun); @@ -312,25 +298,18 @@ static List buildLevelSortedRuns( */ static List
    splitIntoSections( List pickedFiles, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); + fieldComparator.compare( + a.partitionStats().minValues(), b.partitionStats().minValues()); if (cmp != 0) { return cmp; } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + return fieldComparator.compare( + a.partitionStats().maxValues(), b.partitionStats().maxValues()); }); List
    sections = new ArrayList<>(); @@ -345,12 +324,7 @@ static List
    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - >= 0) { + if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; @@ -364,11 +338,7 @@ static List
    splitIntoSections( if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) + if (fieldComparator.compare(file.partitionStats().maxValues(), sectionMaxBound) > 0) { sectionMaxBound = file.partitionStats().maxValues(); } @@ -419,8 +389,7 @@ private static void rewriteSections( List
    sections, Map defaultCompactionMap, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, @@ -437,8 +406,7 @@ private static void rewriteSections( sortAndRewriteSection( section.files, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -452,8 +420,7 @@ private static void rewriteSections( sortAndRewriteSection( section.files, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -487,8 +454,7 @@ private static void rewriteSections( sortAndRewriteSection( rewriteFiles, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -508,8 +474,7 @@ private static void rewriteSections( section.files, defaultCompactionMap, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, suggestedMetaSize, result, @@ -526,8 +491,7 @@ private static void rewriteSubSegments( List section, Map defaultCompactionMap, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, @Nullable Set deleteEntries, long manifestTargetSize, List result, @@ -544,8 +508,7 @@ private static void rewriteSubSegments( sortAndRewriteSection( subSegment, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -560,8 +523,7 @@ private static void rewriteSubSegments( sortAndRewriteSection( subSegment, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -583,8 +545,7 @@ private static void rewriteSubSegments( private static void sortAndRewriteSection( List section, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Set deletedIdentifiers, Map defaultCompactionMap, List result, @@ -608,7 +569,7 @@ private static void sortAndRewriteSection( } if (!entriesToRewrite.isEmpty()) { - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = manifestFile.createRollingWriter(); @@ -631,13 +592,12 @@ private static void sortAndRewriteSection( } /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, kind, fileName)}. * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field * value AND the same data file are emitted contiguously. */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + static int compareSortKey(ManifestEntry a, ManifestEntry b, RecordComparator fieldComparator) { + int c = fieldComparator.compare(a.partition(), b.partition()); if (c != 0) { return c; } @@ -649,50 +609,6 @@ static int compareSortKey( return a.file().fileName().compareTo(b.file().fileName()); } - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - /** * Resolve the partition field to sort manifests by. * diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index d1c15d412fad..e95f4cf21685 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -27,12 +27,10 @@ import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; - -import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; - import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.RepeatedTest; @@ -882,12 +880,12 @@ private void beforeFirstRead() throws IOException { *

    Input manifests (deliberately unordered and overlapping): * *

    -     *   manifest-A: partitions [5, 9]  (entries in partition 5,6,7,8,9)
    -     *   manifest-B: partitions [0, 4]  (entries in partition 0,1,2,3,4)
    +     *   manifest-A: partitions [5, 13]  (entries in partition 5,6,7,8,9)
    +     *   manifest-B: partitions [0, 9]  (entries in partition 0,1,2,3,4)
          *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
          *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
    -     *   manifest-E: partitions [1, 3]  (entries in partition 1,2,3) -- overlaps B and C
    -     *   manifest-F: partitions [10, 14](entries in partition 10,11,12,13,14) -- overlaps D
    +     *   manifest-E: partitions [1, 6]  (entries in partition 1,2,3) -- overlaps B and C
    +     *   manifest-F: partitions [4, 14](entries in partition 10,11,12,13,14) -- overlaps D
          * 
    * *

    After sort rewrite, all surviving ADD entries should be sorted by partition field. @@ -896,16 +894,16 @@ private void beforeFirstRead() throws IOException { public void testManifestSortWithOverlappingPartitions() { List input = new ArrayList<>(); - // manifest-A: partitions [5, 9] + // manifest-A: partitions [5, 13] List entriesA = new ArrayList<>(); - for (int p = 5; p <= 9; p++) { + for (int p = 5; p <= 13; p++) { entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); } input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); - // manifest-B: partitions [0, 4] + // manifest-B: partitions [0, 9] List entriesB = new ArrayList<>(); - for (int p = 0; p <= 4; p++) { + for (int p = 0; p <= 9; p++) { entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); } input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); @@ -924,23 +922,22 @@ public void testManifestSortWithOverlappingPartitions() { } input.add(makeManifest(entriesD.toArray(new ManifestEntry[0]))); - // manifest-E: partitions [1, 3] -- overlaps with B and C + // manifest-E: partitions [1, 6] -- overlaps with B and C List entriesE = new ArrayList<>(); - for (int p = 1; p <= 3; p++) { + for (int p = 1; p <= 6; p++) { entriesE.add(makeEntry(true, String.format("E-p%d", p), p)); } input.add(makeManifest(entriesE.toArray(new ManifestEntry[0]))); - // manifest-F: partitions [10, 14] -- overlaps with D + // manifest-F: partitions [4, 14] -- overlaps with D List entriesF = new ArrayList<>(); - for (int p = 10; p <= 14; p++) { + for (int p = 4; p <= 14; p++) { entriesF.add(makeEntry(true, String.format("F-p%d", p), p)); } input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); testOptions.set("manifest-sort.enabled", "true"); - List merged = ManifestFileMerger.merge( input, @@ -963,100 +960,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - // Verify manifest files themselves are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } - } - - /** - * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This - * exercises buildLevelSortedRuns and the LSM level assignment logic. - * - *

    Creates manifests whose partition ranges overlap in various ways: - * - *

    -     *   run1 (non-overlapping): [0,2], [3,5], [6,8]
    -     *   run2 (overlapping with run1): [1,4], [5,7]
    -     *   run3 (overlapping with both): [0,9]
    -     * 
    - */ - @Test - public void testManifestSortWithMultipleOverlappingRuns() { - List input = new ArrayList<>(); - - // Run1: non-overlapping within itself [0,2], [3,5], [6,8] - input.add( - makeManifest( - makeEntry(true, "r1a-p0", 0), - makeEntry(true, "r1a-p1", 1), - makeEntry(true, "r1a-p2", 2))); - input.add( - makeManifest( - makeEntry(true, "r1b-p3", 3), - makeEntry(true, "r1b-p4", 4), - makeEntry(true, "r1b-p5", 5))); - input.add( - makeManifest( - makeEntry(true, "r1c-p6", 6), - makeEntry(true, "r1c-p7", 7), - makeEntry(true, "r1c-p8", 8))); - - // Run2: overlaps with run1 [1,4], [5,7] - input.add( - makeManifest( - makeEntry(true, "r2a-p1", 1), - makeEntry(true, "r2a-p2", 2), - makeEntry(true, "r2a-p3", 3), - makeEntry(true, "r2a-p4", 4))); - input.add( - makeManifest( - makeEntry(true, "r2b-p5", 5), - makeEntry(true, "r2b-p6", 6), - makeEntry(true, "r2b-p7", 7))); - - // Run3: a large manifest overlapping everything [0,9] - List run3Entries = new ArrayList<>(); - for (int p = 0; p <= 9; p++) { - run3Entries.add(makeEntry(true, String.format("r3-p%d", p), p)); - } - input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); - - Options testOptions = new Options(); - testOptions.set("manifest-sort.enabled", "true"); - - List merged = - ManifestFileMerger.merge( - input, - manifestFile, - getPartitionType(), - CoreOptions.fromMap(testOptions.toMap())); - - // Verify no data loss - assertEquivalentEntries(input, merged); - - // Verify entries within each output manifest are sorted by partition - for (ManifestFileMeta meta : merged) { - List entries = manifestFile.read(meta.fileName(), meta.fileSize()); - for (int i = 1; i < entries.size(); i++) { - int prevPartition = entries.get(i - 1).partition().getInt(0); - int currPartition = entries.get(i).partition().getInt(0); - assertThat(currPartition) - .as( - "Entries within manifest should be sorted, but found %d after %d", - currPartition, prevPartition) - .isGreaterThanOrEqualTo(prevPartition); - } - } - - // Verify output manifests are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } } /** @@ -1118,11 +1021,6 @@ public void testManifestSortEliminatesDeleteEntries() { input.add(makeManifest(makeEntry(false, "C-p7", 7), makeEntry(true, "new-p7", 7))); Options testOptions = new Options(); - // Set target file size very large so all input manifests are considered "small" - // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition - testOptions.set("manifest.target-file-size", "16MB"); - // Set full-compaction threshold very small to ensure it triggers - testOptions.set("manifest.full-compaction-threshold-size", "1B"); testOptions.set("manifest-sort.enabled", "true"); List merged = From d95fa0866ed660d6d818fb85cb3acdb87d172ed9 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:23:48 +0800 Subject: [PATCH 29/48] deleteTrigger --- .../paimon/operation/ManifestFileSorter.java | 81 +++++++++++++------ .../paimon/manifest/ManifestFileMetaTest.java | 5 +- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7220aad93acd..56d6075c77e6 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -38,6 +38,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; @@ -74,6 +75,8 @@ static Optional> trySortRewrite( long suggestedMetaSize = options.manifestTargetSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); + long manifestFullCompactionThresholdSize = + options.manifestFullCompactionThresholdSize().getBytes(); // Step 1: Resolve sort field. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -86,15 +89,17 @@ static Optional> trySortRewrite( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); // Step 2: Classify manifests into defaultCompaction and LSM. + List result = new ArrayList<>(); ClassifyResult classified = classifyManifests( input, + result, suggestedMetaSize, manifestFile, partitionType, + manifestFullCompactionThresholdSize, manifestReadParallelism); - Map defaultCompactionMap = - classified.defaultCompactionManifests; + Map defaultCompactionMap = classified.defaultCompactionManifests; List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; @@ -131,7 +136,7 @@ static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - List result = new ArrayList<>(reusedFiles); + result.addAll(reusedFiles); // Step 5: Split picked files into sections, sort and rewrite each. List pickedFiles = new ArrayList<>(); @@ -151,6 +156,7 @@ static Optional> trySortRewrite( fieldComparator, deleteEntries, suggestedMetaSize, + options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), result, newFilesForAbort, @@ -176,11 +182,23 @@ static Optional> trySortRewrite( */ private static ClassifyResult classifyManifests( List input, + List result, long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, + long sizeTrigger, @Nullable Integer manifestReadParallelism) { - Map defaultCompactionManifests = new LinkedHashMap<>(); + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; + + Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -209,11 +227,19 @@ private static ClassifyResult classifyManifests( file.partitionStats().minValues(), file.partitionStats().maxValues(), file.partitionStats().nullCounts()); - if (small || inDeleteRange) { - iterator.remove(); - defaultCompactionManifests.put(file, new boolean[] {small, inDeleteRange}); + if (removeAllDelete) { + if (small || inDeleteRange) { + iterator.remove(); + defaultCompactionManifests.put(file, inDeleteRange); + } + } else { + if (inDeleteRange) { + iterator.remove(); + result.add(file); + } } } + deleteEntries = removeAllDelete ? deleteEntries : Collections.emptySet(); return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -299,7 +325,7 @@ static List buildLevelSortedRuns( static List
    splitIntoSections( List pickedFiles, RecordComparator fieldComparator, - Map defaultCompactionMap) { + Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = @@ -387,11 +413,12 @@ private static List
    mergeSmallAdjacentSections( */ private static void rewriteSections( List
    sections, - Map defaultCompactionMap, + Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, Set deleteEntries, long suggestedMetaSize, + int suggestedMinMetaCount, long maxRewriteSize, List result, List sortNewFiles, @@ -477,6 +504,7 @@ private static void rewriteSections( fieldComparator, deleteEntries, suggestedMetaSize, + suggestedMinMetaCount, result, sortNewFiles, manifestReadParallelism); @@ -489,11 +517,12 @@ private static void rewriteSections( /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, - Map defaultCompactionMap, + Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, @Nullable Set deleteEntries, long manifestTargetSize, + int suggestedMinMetaCount, List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) @@ -518,17 +547,21 @@ private static void rewriteSubSegments( subSegmentSize = 0; } } - // Flush remaining sub-segment + // Flush remaining sub-segment only if there are enough files to justify rewrite if (!subSegment.isEmpty()) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - result, - sortNewFiles, - manifestReadParallelism); + if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { + sortAndRewriteSection( + subSegment, + manifestFile, + fieldComparator, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); + } else { + result.addAll(subSegment); + } } } @@ -547,14 +580,14 @@ private static void sortAndRewriteSection( ManifestFile manifestFile, RecordComparator fieldComparator, Set deletedIdentifiers, - Map defaultCompactionMap, + Map defaultCompactionMap, List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { if (section.size() == 1 && (!defaultCompactionMap.containsKey(section.get(0)) - || !defaultCompactionMap.get(section.get(0))[1])) { + || !defaultCompactionMap.get(section.get(0)))) { result.add(section.get(0)); return; } @@ -684,13 +717,13 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ - final Map defaultCompactionManifests; + final Map defaultCompactionManifests; final List lsmFiles; @Nullable final Set deleteEntries; ClassifyResult( - Map defaultCompactionManifests, + Map defaultCompactionManifests, List lsmFiles, @Nullable Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index e95f4cf21685..e3e4a29b1df0 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -27,10 +27,12 @@ import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; + +import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.RepeatedTest; @@ -959,7 +961,6 @@ public void testManifestSortWithOverlappingPartitions() { .isGreaterThanOrEqualTo(prevPartition); } } - } /** From 586da76d4ea35e74ef5d61af170adbf85dda548b Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:46:15 +0800 Subject: [PATCH 30/48] addSmall --- .../org/apache/paimon/operation/ManifestFileSorter.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 56d6075c77e6..ee13e80b3609 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -236,6 +236,9 @@ private static ClassifyResult classifyManifests( if (inDeleteRange) { iterator.remove(); result.add(file); + } else if (small) { + iterator.remove(); + defaultCompactionManifests.put(file, false); } } } @@ -520,7 +523,7 @@ private static void rewriteSubSegments( Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, - @Nullable Set deleteEntries, + Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, List result, @@ -579,7 +582,7 @@ private static void sortAndRewriteSection( List section, ManifestFile manifestFile, RecordComparator fieldComparator, - Set deletedIdentifiers, + Set deleteEntries, Map defaultCompactionMap, List result, List sortNewFiles, @@ -593,7 +596,7 @@ private static void sortAndRewriteSection( } // Parallel read: each meta is read independently Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); + meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); List entriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : From fd262ee182f6465ed9bdcc21451464a2dd016116 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:58:50 +0800 Subject: [PATCH 31/48] test --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index e3e4a29b1df0..a13132223e4f 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -1023,6 +1023,7 @@ public void testManifestSortEliminatesDeleteEntries() { Options testOptions = new Options(); testOptions.set("manifest-sort.enabled", "true"); + testOptions.set("manifest.full-compaction-threshold-size", "10B"); List merged = ManifestFileMerger.merge( From 30907ab1dd0cbddf4990aa92d276c023bb66dfdc Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 17:09:53 +0800 Subject: [PATCH 32/48] doc --- docs/layouts/shortcodes/generated/core_configuration.html | 2 +- paimon-api/src/main/java/org/apache/paimon/CoreOptions.java | 2 +- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index a796f7cb5fd8..e6fcb5f93587 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -913,7 +913,7 @@
    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation, If not configured, defaults to the first partition field.
    manifest-sort.max-rewrite-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index e1acba90a8bc..b293ed95a6c2 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -481,7 +481,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; If not configured, defaults to the first partition field."); + + " schema validation, If not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index ee13e80b3609..44440bb8b0c2 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,8 +61,7 @@ public class ManifestFileSorter { /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort - * field cannot be resolved or the delta file size is below the full compaction threshold, the - * input is returned as-is. + * field cannot be resolved, the input is returned as-is. */ static Optional> trySortRewrite( List input, @@ -379,7 +378,7 @@ static List
    splitIntoSections( /** * Merge small adjacent sections to avoid producing too many small rewrite batches. If either - * the pending section or the current section total size is smaller than half of {@code + * the pending section or the current section total size is smaller than {@code * suggestedMetaSize}, they are combined into a single section. */ private static List
    mergeSmallAdjacentSections( From b1fc5eb4f7959e104c971096dc1e150dc4628465 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 18:07:27 +0800 Subject: [PATCH 33/48] comment --- .../paimon/operation/ManifestFileSorter.java | 63 ++++++++++--------- .../operation/ManifestPickStrategy.java | 3 +- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 44440bb8b0c2..f527c7b330bc 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -172,12 +172,11 @@ static Optional> trySortRewrite( /** * Classify manifest files into default-compaction group and LSM group. * - *

    When full compaction is triggered (totalDeltaFileSize >= threshold), files that must - * change or overlap with delete partitions go into defaultCompactionManifests; the rest stay as - * lsmFiles. + *

    Full compaction: small files and files overlapping delete partitions go into + * defaultCompactionManifests; the rest stay as lsmFiles. * - *

    When full compaction is NOT triggered, adjacent small manifests whose cumulative size - * reaches suggestedMetaSize are grouped into defaultCompactionManifests (minor-style pick). + *

    Non-full compaction: delete-overlapping files go to result, small files go to + * defaultCompactionManifests for minor-style merge. */ private static ClassifyResult classifyManifests( List input, @@ -187,6 +186,7 @@ private static ClassifyResult classifyManifests( RowType partitionType, long sizeTrigger, @Nullable Integer manifestReadParallelism) { + // Calculate total size of files that need compaction to determine full-compaction trigger Filter mustChange = file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; long totalDeltaFileSize = 0; @@ -197,11 +197,13 @@ private static ClassifyResult classifyManifests( } boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; + // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + // Build partition predicate from delete entries for overlap detection PartitionPredicate predicate; if (deleteEntries.isEmpty()) { predicate = PartitionPredicate.ALWAYS_FALSE; @@ -215,6 +217,7 @@ private static ClassifyResult classifyManifests( } } + // Classify each file based on size and delete-partition overlap Iterator iterator = lsmFiles.iterator(); while (iterator.hasNext()) { ManifestFileMeta file = iterator.next(); @@ -227,11 +230,13 @@ private static ClassifyResult classifyManifests( file.partitionStats().maxValues(), file.partitionStats().nullCounts()); if (removeAllDelete) { + // Full compaction: collect small or delete-overlapping files if (small || inDeleteRange) { iterator.remove(); defaultCompactionManifests.put(file, inDeleteRange); } } else { + // Non-full: separate delete-overlapping into result, small into compaction group if (inDeleteRange) { iterator.remove(); result.add(file); @@ -309,7 +314,7 @@ static List buildLevelSortedRuns( // Step 4: Sort by totalSize and assign levels result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); int n = result.size(); - int maxLevel = 4; + int maxLevel = ManifestPickStrategy.MAX_LEVEL; for (int i = 0; i < n; i++) { if (i >= n - maxLevel) { result.get(i).setLevel(i - (n - maxLevel) + 1); @@ -406,12 +411,16 @@ private static List

    mergeSmallAdjacentSections( } /** - * Iterate over sections, decide whether to rewrite each section fully or partially based on the - * maxRewriteSize threshold and whether the section contains defaultCompaction files. + * Rewrite sections with a budget-controlled strategy. * - *

    Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold - * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds - * threshold with no defaultCompaction files: skip (keep as-is). + *

      + *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). + *
    • 2. Within budget: sort and rewrite the entire section. + *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining files + * form a new section appended for later processing. + *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. + *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. + *
    */ private static void rewriteSections( List
    sections, @@ -456,11 +465,9 @@ private static void rewriteSections( sortNewFiles, manifestReadParallelism); } else if (!reachedLimit) { - // First time exceeding threshold without defaultCompaction: - // partial rewrite within remaining budget. + // Partial rewrite: split section at the budget boundary. long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; - // Split section into two parts: files within budget and remaining files List rewriteFiles = new ArrayList<>(); List remainingFiles = new ArrayList<>(); long rewriteSize = 0; @@ -490,11 +497,10 @@ private static void rewriteSections( sortNewFiles, manifestReadParallelism); - // Create new section for remaining files and append to sections list + // Append remaining files as a new section for later processing. if (!remainingFiles.isEmpty()) { Section remainingSection = new Section(remainingFiles, remainingSize, remainingHasDefault); - // Append remaining section to the end of sections list sections.add(remainingSection); } reachedLimit = true; @@ -516,7 +522,11 @@ private static void rewriteSections( } } - /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ + /** + * Batch-rewrite files in a section by splitting them into sub-segments of {@code + * manifestTargetSize}. Tail sub-segment is only rewritten if it has delete entries or meets + * {@code suggestedMinMetaCount}. + */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -549,7 +559,7 @@ private static void rewriteSubSegments( subSegmentSize = 0; } } - // Flush remaining sub-segment only if there are enough files to justify rewrite + // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { sortAndRewriteSection( @@ -568,14 +578,8 @@ private static void rewriteSubSegments( } /** - * Read all entries from a section's manifest files, sort them in memory by the specified - * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to new manifest files via the rolling writer. - * - *

    All files participate in sorting, enabling full sort across the entire section. - * - *

    Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as - * {@link ManifestFileMerger#tryFullCompaction}. + * Read entries from a section's manifest files, sort by partition field, and write to new + * manifests. Single non-delete-range files are passed through without rewrite. */ private static void sortAndRewriteSection( List section, @@ -587,13 +591,13 @@ private static void sortAndRewriteSection( List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { + // Skip rewrite for single file not in delete-range. if (section.size() == 1 - && (!defaultCompactionMap.containsKey(section.get(0)) - || !defaultCompactionMap.get(section.get(0)))) { + && !defaultCompactionMap.getOrDefault(section.get(0), false)) { result.add(section.get(0)); return; } - // Parallel read: each meta is read independently + // Read all entries in parallel. Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); @@ -604,6 +608,7 @@ private static void sortAndRewriteSection( } if (!entriesToRewrite.isEmpty()) { + // Sort and write to new manifest files. entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 2cc9faf2ec26..3a8693d4dcf0 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -38,9 +38,10 @@ */ public class ManifestPickStrategy { + public static final int MAX_LEVEL = 4; + private final int sizeAmpThreshold; private final int sizeRatioThreshold; - private static final int MAX_LEVEL = 4; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); From 91e22a1b20e2ef702bf1006502e1ed965ba57fbb Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 19:45:26 +0800 Subject: [PATCH 34/48] modifyTests --- .../paimon/operation/ManifestFileSorter.java | 7 +- .../paimon/manifest/ManifestFileMetaTest.java | 251 +++++++++++++++++- .../paimon/schema/SchemaValidationTest.java | 50 ++-- 3 files changed, 258 insertions(+), 50 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index f527c7b330bc..7f1a7a455fa7 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -416,8 +416,8 @@ private static List

    mergeSmallAdjacentSections( *
      *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). *
    • 2. Within budget: sort and rewrite the entire section. - *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining files - * form a new section appended for later processing. + *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining + * files form a new section appended for later processing. *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. *
    @@ -592,8 +592,7 @@ private static void sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. - if (section.size() == 1 - && !defaultCompactionMap.getOrDefault(section.get(0), false)) { + if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { result.add(section.get(0)); return; } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index a13132223e4f..d41cc1413b54 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -20,16 +20,24 @@ import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.data.BinaryRowWriter; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.fs.FileIO; +import org.apache.paimon.fs.FileIOFinder; import org.apache.paimon.fs.Path; import org.apache.paimon.fs.SeekableInputStream; import org.apache.paimon.fs.SeekableInputStreamWrapper; import org.apache.paimon.fs.local.LocalFileIO; +import org.apache.paimon.io.DataFileMeta; import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.schema.SchemaManager; +import org.apache.paimon.stats.StatsTestUtils; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; +import org.apache.paimon.utils.FileStorePathFactory; import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; @@ -44,6 +52,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -878,19 +887,7 @@ private void beforeFirstRead() throws IOException { /** * Test manifest sort with overlapping partition ranges. Each manifest contains entries spanning * multiple partitions, creating overlapping intervals that require sort rewrite to resolve. - * - *

    Input manifests (deliberately unordered and overlapping): - * - *

    -     *   manifest-A: partitions [5, 13]  (entries in partition 5,6,7,8,9)
    -     *   manifest-B: partitions [0, 9]  (entries in partition 0,1,2,3,4)
    -     *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
    -     *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
    -     *   manifest-E: partitions [1, 6]  (entries in partition 1,2,3) -- overlaps B and C
    -     *   manifest-F: partitions [4, 14](entries in partition 10,11,12,13,14) -- overlaps D
    -     * 
    - * - *

    After sort rewrite, all surviving ADD entries should be sorted by partition field. + * After sort rewrite, all surviving ADD entries should be sorted by partition field. */ @Test public void testManifestSortWithOverlappingPartitions() { @@ -1066,4 +1063,232 @@ public void testManifestSortEliminatesDeleteEntries() { } } } + /** + * Test manifest sort with a multi-field partition type. + * + *

    Setup: partition=(region INT, dt INT, hour INT), sort by dt (field index=1). 9 manifest + * files form 6 overlapping sorted runs by dt range: + * + *

    +     *   Run1: 3 files, dt=[0,15],[3,5],[6,8]
    +     *   Run2: 2 files, dt=[1,8],[5,7]
    +     *   Run3: 1 file,  dt=[0,9]
    +     *   Run4: 1 file,  dt=[5,14]
    +     *   Run5: 1 file,  dt=[8,15]
    +     *   Run6: 1 file,  dt=[4,12]
    +     * 
    + * + *

    Verifies: 1) no data loss after sort-rewrite, 2) entries within each output manifest are + * sorted by dt. + */ + @Test + public void testManifestSortWithMultiplePartitions() { + // Use a 3-field partition type: (region INT, dt INT, hour INT) + RowType multiPartitionType = RowType.of(new IntType(), new IntType(), new IntType()); + + // Create a dedicated ManifestFile for the 3-field partition type + Path path = new Path(tempDir.toString()); + FileIO fileIO = FileIOFinder.find(path); + ManifestFile multiPartManifestFile = + new ManifestFile.Factory( + fileIO, + new SchemaManager(fileIO, path), + multiPartitionType, + avro, + "zstd", + new FileStorePathFactory( + path, + multiPartitionType, + "default", + CoreOptions.FILE_FORMAT.defaultValue(), + CoreOptions.DATA_FILE_PREFIX.defaultValue(), + CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue(), + CoreOptions.PARTITION_GENERATE_LEGACY_NAME.defaultValue(), + CoreOptions.FILE_SUFFIX_INCLUDE_COMPRESSION.defaultValue(), + CoreOptions.FILE_COMPRESSION.defaultValue(), + null, + null, + CoreOptions.ExternalPathStrategy.NONE, + null, + false, + null), + Long.MAX_VALUE, + null) + .create(); + + List input = new ArrayList<>(); + + // Run1 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1a-p0", 10, 0, 1), + makeMultiPartEntry(true, "r1a-p1", 20, 1, 2), + makeMultiPartEntry(true, "r1a-p2", 30, 15, 3))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1b-p3", 10, 3, 4), + makeMultiPartEntry(true, "r1b-p4", 20, 4, 5), + makeMultiPartEntry(true, "r1b-p5", 30, 5, 6))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1c-p6", 10, 6, 7), + makeMultiPartEntry(true, "r1c-p7", 20, 7, 8), + makeMultiPartEntry(true, "r1c-p8", 30, 8, 9))) + .get(0)); + + // Run2 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r2a-p1", 5, 1, 10), + makeMultiPartEntry(true, "r2a-p2", 15, 2, 11), + makeMultiPartEntry(true, "r2a-p3", 25, 3, 12), + makeMultiPartEntry(true, "r2a-p4", 35, 8, 13))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r2b-p5", 5, 5, 14), + makeMultiPartEntry(true, "r2b-p6", 15, 6, 15), + makeMultiPartEntry(true, "r2b-p7", 25, 7, 16))) + .get(0)); + + // Run3 + List run3Entries = new ArrayList<>(); + for (int p = 0; p <= 9; p++) { + run3Entries.add(makeMultiPartEntry(true, String.format("r3-p%d", p), 99, p, p + 20)); + } + input.add(multiPartManifestFile.write(run3Entries).get(0)); + + // Run4 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r4a-p10", 10, 5, 30), + makeMultiPartEntry(true, "r4a-p11", 20, 11, 31), + makeMultiPartEntry(true, "r4a-p12", 30, 12, 32), + makeMultiPartEntry(true, "r4a-p13", 40, 13, 33), + makeMultiPartEntry(true, "r4a-p14", 50, 14, 34))) + .get(0)); + + // Run5 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r5a-p11", 11, 8, 40), + makeMultiPartEntry(true, "r5a-p12", 21, 12, 41), + makeMultiPartEntry(true, "r5a-p13", 31, 13, 42), + makeMultiPartEntry(true, "r5a-p14", 41, 14, 43), + makeMultiPartEntry(true, "r5a-p15", 51, 15, 44))) + .get(0)); + + // Run6 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r6a-p7", 7, 4, 50), + makeMultiPartEntry(true, "r6a-p8", 17, 8, 51), + makeMultiPartEntry(true, "r6a-p9", 27, 9, 52), + makeMultiPartEntry(true, "r6a-p10", 37, 10, 53), + makeMultiPartEntry(true, "r6a-p11", 47, 11, 54), + makeMultiPartEntry(true, "r6a-p12", 57, 12, 55))) + .get(0)); + + Options testOptions = new Options(); + testOptions.set("manifest-sort.enabled", "true"); + // Sort by the second partition field "f1" (dt) + testOptions.set("manifest-sort.partition-field", "f1"); + List merged = + ManifestFileMerger.merge( + input, + multiPartManifestFile, + multiPartitionType, + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + List inputEntries = + input.stream() + .flatMap( + f -> + multiPartManifestFile.read(f.fileName(), f.fileSize()) + .stream()) + .collect(Collectors.toList()); + List entryBeforeMerge = + FileEntry.mergeEntries(inputEntries).stream() + .filter(entry -> entry.kind() == FileKind.ADD) + .map(entry -> entry.kind() + "-" + entry.file().fileName()) + .collect(Collectors.toList()); + List entryAfterMerge = new ArrayList<>(); + for (ManifestFileMeta meta : merged) { + for (ManifestEntry entry : + multiPartManifestFile.read(meta.fileName(), meta.fileSize())) { + entryAfterMerge.add(entry.kind() + "-" + entry.file().fileName()); + } + } + assertThat(entryBeforeMerge).hasSameElementsAs(entryAfterMerge); + + // Verify entries within each output manifest are sorted by the second field (dt) + for (ManifestFileMeta meta : merged) { + List entries = + multiPartManifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevDt = entries.get(i - 1).partition().getInt(1); + int currDt = entries.get(i).partition().getInt(1); + assertThat(currDt) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevDt); + } + } + } + + /** Create a ManifestEntry with a 3-field partition row (region, dt, hour). */ + private ManifestEntry makeMultiPartEntry( + boolean isAdd, String fileName, int region, int dt, int hour) { + BinaryRow binaryRow = new BinaryRow(3); + BinaryRowWriter writer = new BinaryRowWriter(binaryRow); + writer.writeInt(0, region); + writer.writeInt(1, dt); + writer.writeInt(2, hour); + writer.complete(); + + return ManifestEntry.create( + isAdd ? FileKind.ADD : FileKind.DELETE, + binaryRow, + 0, + 0, + DataFileMeta.create( + fileName, + 0, + 0, + binaryRow, + binaryRow, + StatsTestUtils.newEmptySimpleStats(), + StatsTestUtils.newEmptySimpleStats(), + 0, + 0, + 0, + 0, + Collections.emptyList(), + Timestamp.fromEpochMillis(200000), + 0L, + null, + FileSource.APPEND, + null, + null, + null, + null)); + } } diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index 0aa62207f099..71d200ac6206 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -487,16 +487,16 @@ public void testFileFormatPerLevelAcceptsCompatibleSchema() { } @Test - void testManifestSortEnableOnNonPartitionTable() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); - options.put(BUCKET.key(), String.valueOf(-1)); - + void testManifestSortValidation() { List fields = Arrays.asList( new DataField(0, "f0", DataTypes.INT()), new DataField(1, "f1", DataTypes.INT())); + // Test 1: manifest-sort.enabled on non-partition table should fail + Map options1 = new HashMap<>(); + options1.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options1.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( () -> validateTableSchema( @@ -506,23 +506,15 @@ void testManifestSortEnableOnNonPartitionTable() { 10, emptyList(), emptyList(), - options, + options1, ""))) .hasMessageContaining( "Cannot enable 'manifest-sort.enabled' for non-partition table."); - } - - @Test - void testManifestSortPartitionFieldNotInPartitionKeys() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); - options.put(BUCKET.key(), String.valueOf(-1)); - - List fields = - Arrays.asList( - new DataField(0, "f0", DataTypes.INT()), - new DataField(1, "f1", DataTypes.INT())); + // Test 2: manifest-sort-partition-field not in partition keys should fail + Map options2 = new HashMap<>(); + options2.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); + options2.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( () -> validateTableSchema( @@ -532,23 +524,15 @@ void testManifestSortPartitionFieldNotInPartitionKeys() { 10, singletonList("f0"), emptyList(), - options, + options2, ""))) .hasMessageContaining("is not a partition field"); - } - - @Test - void testManifestSortValidConfig() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); - options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); - options.put(BUCKET.key(), String.valueOf(-1)); - - List fields = - Arrays.asList( - new DataField(0, "f0", DataTypes.INT()), - new DataField(1, "f1", DataTypes.INT())); + // Test 3: valid manifest-sort config should pass + Map options3 = new HashMap<>(); + options3.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options3.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); + options3.put(BUCKET.key(), String.valueOf(-1)); assertThatNoException() .isThrownBy( () -> @@ -559,7 +543,7 @@ void testManifestSortValidConfig() { 10, singletonList("f0"), emptyList(), - options, + options3, ""))); } } From 63760a7fd24a0e53048f4425179d9ffd9cdb66ea Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 20:59:26 +0800 Subject: [PATCH 35/48] fmt --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index d41cc1413b54..462ab337ee73 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -1063,6 +1063,7 @@ public void testManifestSortEliminatesDeleteEntries() { } } } + /** * Test manifest sort with a multi-field partition type. * From 6a9240243c96dc040358360c4cea86d4f2d20656 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 15:34:29 +0800 Subject: [PATCH 36/48] index --- .../paimon/operation/ManifestFileSorter.java | 169 ++++++++++++------ 1 file changed, 116 insertions(+), 53 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7f1a7a455fa7..563e10110c9d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -24,6 +24,7 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; @@ -31,6 +32,7 @@ import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; +import org.apache.paimon.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +42,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -87,12 +90,22 @@ static Optional> trySortRewrite( CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + // Build fileName -> index mapping from input + Map fileNameToIndex = new HashMap<>(); + for (int i = 0; i < input.size(); i++) { + fileNameToIndex.put(input.get(i).fileName(), i); + } + + // Build result as 2D list with same size as input + List> result = new ArrayList<>(input.size()); + for (int i = 0; i < input.size(); i++) { + result.add(new ArrayList<>()); + } + // Step 2: Classify manifests into defaultCompaction and LSM. - List result = new ArrayList<>(); ClassifyResult classified = classifyManifests( input, - result, suggestedMetaSize, manifestFile, partitionType, @@ -135,7 +148,14 @@ static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - result.addAll(reusedFiles); + + // Place reusedFiles at their original index positions + for (ManifestFileMeta file : reusedFiles) { + Integer idx = fileNameToIndex.get(file.fileName()); + if (idx != null) { + result.get(idx).add(file); + } + } // Step 5: Split picked files into sections, sort and rewrite each. List pickedFiles = new ArrayList<>(); @@ -144,10 +164,21 @@ static Optional> trySortRewrite( } pickedFiles.addAll(defaultCompactionMap.keySet()); + // Compute minIdx and maxIdx from pickedFiles + int minIdx = Integer.MAX_VALUE; + int maxIdx = Integer.MIN_VALUE; + for (ManifestFileMeta meta : pickedFiles) { + Integer idx = fileNameToIndex.get(meta.fileName()); + if (idx != null) { + minIdx = Math.min(minIdx, idx); + maxIdx = Math.max(maxIdx, idx); + } + } + Pair indexRange = Pair.of(minIdx, maxIdx); + List

    sections = splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - rewriteSections( sections, defaultCompactionMap, @@ -158,15 +189,22 @@ static Optional> trySortRewrite( options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), result, + indexRange, newFilesForAbort, manifestReadParallelism); + // Flatten 2D result into a single list + List flatResult = new ArrayList<>(); + for (List subList : result) { + flatResult.addAll(subList); + } + LOG.info( "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), newFilesForAbort.size(), - result.size()); - return Optional.of(result); + flatResult.size()); + return Optional.of(flatResult); } /** @@ -180,7 +218,6 @@ static Optional> trySortRewrite( */ private static ClassifyResult classifyManifests( List input, - List result, long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, @@ -195,25 +232,26 @@ private static ClassifyResult classifyManifests( totalDeltaFileSize += file.fileSize(); } } - boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; - // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - - // Build partition predicate from delete entries for overlap detection - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; - } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + Set deleteEntries = Collections.emptySet(); + PartitionPredicate predicate = null; + if (totalDeltaFileSize >= sizeTrigger) { + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + // Build partition predicate from delete entries for overlap detection + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; } else { - predicate = PartitionPredicate.ALWAYS_TRUE; + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } } } @@ -229,24 +267,11 @@ private static ClassifyResult classifyManifests( file.partitionStats().minValues(), file.partitionStats().maxValues(), file.partitionStats().nullCounts()); - if (removeAllDelete) { - // Full compaction: collect small or delete-overlapping files - if (small || inDeleteRange) { - iterator.remove(); - defaultCompactionManifests.put(file, inDeleteRange); - } - } else { - // Non-full: separate delete-overlapping into result, small into compaction group - if (inDeleteRange) { - iterator.remove(); - result.add(file); - } else if (small) { - iterator.remove(); - defaultCompactionManifests.put(file, false); - } + if (small || inDeleteRange) { + iterator.remove(); + defaultCompactionManifests.put(file, inDeleteRange); } } - deleteEntries = removeAllDelete ? deleteEntries : Collections.emptySet(); return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -431,7 +456,8 @@ private static void rewriteSections( long suggestedMetaSize, int suggestedMinMetaCount, long maxRewriteSize, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -448,6 +474,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); continue; @@ -462,6 +489,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); } else if (!reachedLimit) { @@ -494,6 +522,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); @@ -514,10 +543,11 @@ private static void rewriteSections( suggestedMetaSize, suggestedMinMetaCount, result, + indexRange, sortNewFiles, manifestReadParallelism); } else { - result.addAll(section.files); + result.get(indexRange.getLeft()).addAll(section.files); } } } @@ -535,7 +565,8 @@ private static void rewriteSubSegments( Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -553,6 +584,7 @@ private static void rewriteSubSegments( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); subSegment.clear(); @@ -569,17 +601,19 @@ private static void rewriteSubSegments( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); } else { - result.addAll(subSegment); + result.get(indexRange.getLeft()).addAll(subSegment); } } } /** - * Read entries from a section's manifest files, sort by partition field, and write to new - * manifests. Single non-delete-range files are passed through without rewrite. + * Read entries from a section's manifest files, split into ADD and DELETE entries, sort each + * group separately, write to new manifests, and place ADD meta at result[minIdx] and DELETE + * meta at result[maxIdx]. */ private static void sortAndRewriteSection( List section, @@ -587,34 +621,63 @@ private static void sortAndRewriteSection( RecordComparator fieldComparator, Set deleteEntries, Map defaultCompactionMap, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { - result.add(section.get(0)); + result.get(indexRange.getLeft()).add(section.get(0)); return; } // Read all entries in parallel. Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); - List entriesToRewrite = new ArrayList<>(); + List addEntriesToRewrite = new ArrayList<>(); + List deleteEntriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - entriesToRewrite.addAll(readResult.entries); + for (ManifestEntry entry : readResult.entries) { + if (entry.kind() == FileKind.ADD) { + addEntriesToRewrite.add(entry); + } else { + deleteEntriesToRewrite.add(entry); + } + } } - if (!entriesToRewrite.isEmpty()) { - // Sort and write to new manifest files. - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + // Sort and write ADD entries + if (!addEntriesToRewrite.isEmpty()) { + addEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + RollingFileWriter writer = + manifestFile.createRollingWriter(); + Exception exception = null; + try { + writer.write(addEntriesToRewrite); + } catch (Exception e) { + exception = e; + } finally { + if (exception != null) { + writer.abort(); + throw exception; + } + writer.close(); + } + List sorted = writer.result(); + result.get(indexRange.getLeft()).addAll(sorted); + sortNewFiles.addAll(sorted); + } + // Sort and write DELETE entries + if (!deleteEntriesToRewrite.isEmpty()) { + deleteEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; try { - writer.write(entriesToRewrite); + writer.write(deleteEntriesToRewrite); } catch (Exception e) { exception = e; } finally { @@ -625,7 +688,7 @@ private static void sortAndRewriteSection( writer.close(); } List sorted = writer.result(); - result.addAll(sorted); + result.get(indexRange.getRight()).addAll(sorted); sortNewFiles.addAll(sorted); } } From e885973aaeb8b931e59d094325e1fc331b4a1ff3 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 17:54:18 +0800 Subject: [PATCH 37/48] split --- .../paimon/operation/ManifestFileMerger.java | 46 +- .../paimon/operation/ManifestFileSorter.java | 547 ++++++++++++------ 2 files changed, 398 insertions(+), 195 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 36de3d2ecdef..fad84521f5fb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,31 +78,29 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - Optional> sorted = - ManifestFileSorter.trySortRewrite( - input, newFilesForAbort, manifestFile, partitionType, options); - return sorted.orElse(input); + return ManifestFileSorter.trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); + } else { + // Otherwise try full compaction first, then minor compaction if needed + Optional> fullCompacted = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + return fullCompacted.orElseGet( + () -> + tryMinorCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + manifestReadParallelism)); } - - // Otherwise try full compaction first, then minor compaction if needed - Optional> fullCompacted = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); - return fullCompacted.orElseGet( - () -> - tryMinorCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - suggestedMinMetaCount, - manifestReadParallelism)); } catch (Throwable e) { // exception occurs, clean up and rethrow for (ManifestFileMeta manifest : newFilesForAbort) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 563e10110c9d..b351fc960316 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -65,106 +65,186 @@ public class ManifestFileSorter { /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. + * + *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link + * #tryMinorCompact} otherwise. */ - static Optional> trySortRewrite( + static List trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Try full compaction first, then minor compaction if full compact is skipped + Optional> fullCompacted = + tryFullCompact(input, newFilesForAbort, manifestFile, partitionType, options); + if (fullCompacted.isPresent()) { + return fullCompacted.get(); + } + return tryMinorCompact(input, newFilesForAbort, manifestFile, partitionType, options); + } + + /** + * Full compaction path: totalDeltaFileSize >= sizeTrigger. + * + *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) + * together without separating them. + */ + private static Optional> tryFullCompact( List input, List newFilesForAbort, ManifestFile manifestFile, RowType partitionType, CoreOptions options) throws Exception { - // Extract configuration from options long suggestedMetaSize = options.manifestTargetSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); - long manifestFullCompactionThresholdSize = - options.manifestFullCompactionThresholdSize().getBytes(); - // Step 1: Resolve sort field. - String sortField = resolveSortField(sortPartitionField, partitionType); - if (sortField == null) { - throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite."); + + // Step 1: Check if full compaction threshold is met + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize) { + totalDeltaFileSize += file.fileSize(); + } } - int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - RecordComparator fieldComparator = - CodeGenUtils.newRecordComparator( - partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + if (totalDeltaFileSize < options.manifestFullCompactionThresholdSize().getBytes()) { + return Optional.empty(); + } + + // Step 2: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, true); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; + + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + LOG.debug( + "Manifest sort full compact skipped: no runs picked and no defaultCompaction files."); + return Optional.empty(); + } + + LOG.info( + "Manifest sort full compact: input={} files, lsm={} runs, picked={} runs, " + + "defaultCompaction={} files.", + input.size(), + levelRuns.size(), + pickedRuns.size(), + defaultCompactionMap.size()); + + // Step 3: Collect reused files (not picked) and picked files + Set pickedSet = new HashSet<>(pickedRuns); + List result = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + result.addAll(run.files()); + } + } + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + pickedFiles.addAll(defaultCompactionMap.keySet()); + + // Step 4: Split into sections and merge small adjacent sections + List

    sections = + splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + + // Step 5: Rewrite sections + FullCompactOutput output = new FullCompactOutput(result); + rewriteSections( + sections, + defaultCompactionMap, + manifestFile, + fieldComparator, + deleteEntries, + suggestedMetaSize, + options.manifestMergeMinCount(), + options.manifestSortMaxRewriteSize(), + output, + newFilesForAbort, + options.scanManifestParallelism(), + true); + + LOG.info( + "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", + sections.size(), + newFilesForAbort.size(), + result.size()); + return Optional.of(result); + } + + /** + * Minor compaction path: totalDeltaFileSize < sizeTrigger. + * + *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD + * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. + */ + private static List tryMinorCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + + // Step 1: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, false); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; - // Build fileName -> index mapping from input + // Step 2: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); for (int i = 0; i < input.size(); i++) { fileNameToIndex.put(input.get(i).fileName(), i); } - - // Build result as 2D list with same size as input List> result = new ArrayList<>(input.size()); for (int i = 0; i < input.size(); i++) { result.add(new ArrayList<>()); } - // Step 2: Classify manifests into defaultCompaction and LSM. - ClassifyResult classified = - classifyManifests( - input, - suggestedMetaSize, - manifestFile, - partitionType, - manifestFullCompactionThresholdSize, - manifestReadParallelism); - Map defaultCompactionMap = classified.defaultCompactionManifests; - List lsmFiles = classified.lsmFiles; - Set deleteEntries = classified.deleteEntries; - - // Step 3: Build LSM Tree and assign levels (only for lsmFiles). - List levelRuns = - lsmFiles.isEmpty() - ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, fieldComparator); - - // Step 4: Pick runs to compact. - ManifestPickStrategy pickStrategy = - new ManifestPickStrategy( - options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); - List pickedRuns = pickStrategy.pick(levelRuns); - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( - "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); - return Optional.empty(); + "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); + return input; } LOG.info( - "Manifest sort rewrite: input={} files, lsm={} runs, picked={} runs, " + "Manifest sort minor compact: input={} files, lsm={} runs, picked={} runs, " + "defaultCompaction={} files.", input.size(), levelRuns.size(), pickedRuns.size(), defaultCompactionMap.size()); + // Step 3: Collect reused files at their original index positions Set pickedSet = new HashSet<>(pickedRuns); - List reusedFiles = new ArrayList<>(); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { - reusedFiles.addAll(run.files()); - } - } - - // Place reusedFiles at their original index positions - for (ManifestFileMeta file : reusedFiles) { - Integer idx = fileNameToIndex.get(file.fileName()); - if (idx != null) { - result.get(idx).add(file); + for (ManifestFileMeta file : run.files()) { + Integer idx = fileNameToIndex.get(file.fileName()); + if (idx != null) { + result.get(idx).add(file); + } + } } } - // Step 5: Split picked files into sections, sort and rewrite each. + // Step 4: Collect picked files and compute index range List pickedFiles = new ArrayList<>(); for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); - // Compute minIdx and maxIdx from pickedFiles int minIdx = Integer.MAX_VALUE; int maxIdx = Integer.MIN_VALUE; for (ManifestFileMeta meta : pickedFiles) { @@ -176,9 +256,13 @@ static Optional> trySortRewrite( } Pair indexRange = Pair.of(minIdx, maxIdx); + // Step 5: Split into sections and merge small adjacent sections List

    sections = splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + + // Step 6: Rewrite sections + MinorCompactOutput output = new MinorCompactOutput(result, indexRange); rewriteSections( sections, defaultCompactionMap, @@ -188,23 +272,76 @@ static Optional> trySortRewrite( suggestedMetaSize, options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), - result, - indexRange, + output, newFilesForAbort, - manifestReadParallelism); + options.scanManifestParallelism(), + false); - // Flatten 2D result into a single list + // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); for (List subList : result) { flatResult.addAll(subList); } LOG.info( - "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", + "Manifest sort minor compact completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), newFilesForAbort.size(), flatResult.size()); - return Optional.of(flatResult); + return flatResult; + } + + /** + * Prepare compaction context by extracting common logic from tryFullCompact and + * tryMinorCompact. + */ + private static CompactionContext prepareCompaction( + List input, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options, + boolean fullCompaction) { + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + String sortPartitionField = options.manifestSortPartitionField(); + + String sortField = resolveSortField(sortPartitionField, partitionType); + if (sortField == null) { + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite."); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + RecordComparator fieldComparator = + CodeGenUtils.newRecordComparator( + partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + + ClassifyResult classified = + classifyManifests( + input, + suggestedMetaSize, + manifestFile, + partitionType, + fullCompaction, + manifestReadParallelism); + + List lsmFiles = classified.lsmFiles; + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, fieldComparator); + + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy( + options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); + List pickedRuns = pickStrategy.pick(levelRuns); + + return new CompactionContext( + fieldComparator, + classified.defaultCompactionManifests, + classified.lsmFiles, + classified.deleteEntries, + levelRuns, + pickedRuns); } /** @@ -221,23 +358,14 @@ private static ClassifyResult classifyManifests( long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, - long sizeTrigger, + boolean fullCompaction, @Nullable Integer manifestReadParallelism) { - // Calculate total size of files that need compaction to determine full-compaction trigger - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = Collections.emptySet(); PartitionPredicate predicate = null; - if (totalDeltaFileSize >= sizeTrigger) { + if (fullCompaction) { deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -435,18 +563,7 @@ private static List
    mergeSmallAdjacentSections( return merged; } - /** - * Rewrite sections with a budget-controlled strategy. - * - *
      - *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). - *
    • 2. Within budget: sort and rewrite the entire section. - *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining - * files form a new section appended for later processing. - *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. - *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. - *
    - */ + /** Unified method to rewrite sections with budget control. */ private static void rewriteSections( List
    sections, Map defaultCompactionMap, @@ -456,10 +573,10 @@ private static void rewriteSections( long suggestedMetaSize, int suggestedMinMetaCount, long maxRewriteSize, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -473,10 +590,10 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); continue; } @@ -488,12 +605,11 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else if (!reachedLimit) { - // Partial rewrite: split section at the budget boundary. long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; List rewriteFiles = new ArrayList<>(); @@ -521,12 +637,11 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); - // Append remaining files as a new section for later processing. if (!remainingFiles.isEmpty()) { Section remainingSection = new Section(remainingFiles, remainingSize, remainingHasDefault); @@ -542,21 +657,17 @@ private static void rewriteSections( deleteEntries, suggestedMetaSize, suggestedMinMetaCount, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else { - result.get(indexRange.getLeft()).addAll(section.files); + output.addAllUnchanged(section.files); } } } - /** - * Batch-rewrite files in a section by splitting them into sub-segments of {@code - * manifestTargetSize}. Tail sub-segment is only rewritten if it has delete entries or meets - * {@code suggestedMinMetaCount}. - */ + /** Unified method to rewrite sub-segments with budget control. */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -565,10 +676,10 @@ private static void rewriteSubSegments( Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -583,10 +694,10 @@ private static void rewriteSubSegments( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); subSegment.clear(); subSegmentSize = 0; } @@ -600,20 +711,20 @@ private static void rewriteSubSegments( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else { - result.get(indexRange.getLeft()).addAll(subSegment); + output.addAllUnchanged(subSegment); } } } /** - * Read entries from a section's manifest files, split into ADD and DELETE entries, sort each - * group separately, write to new manifests, and place ADD meta at result[minIdx] and DELETE - * meta at result[maxIdx]. + * Unified method to sort and rewrite a section. + * + * @param fullCompaction if true, merge ADD+DELETE entries together; if false, separate them */ private static void sortAndRewriteSection( List section, @@ -621,78 +732,79 @@ private static void sortAndRewriteSection( RecordComparator fieldComparator, Set deleteEntries, Map defaultCompactionMap, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { - result.get(indexRange.getLeft()).add(section.get(0)); + output.addUnchanged(section.get(0)); return; } - // Read all entries in parallel. + + // Read all entries in parallel (common for both paths). Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); - List addEntriesToRewrite = new ArrayList<>(); + List addEntries = new ArrayList<>(); List deleteEntriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - for (ManifestEntry entry : readResult.entries) { - if (entry.kind() == FileKind.ADD) { - addEntriesToRewrite.add(entry); - } else { - deleteEntriesToRewrite.add(entry); + if (fullCompaction) { + addEntries.addAll(readResult.entries); + } else { + for (ManifestEntry entry : readResult.entries) { + if (entry.kind() == FileKind.ADD) { + addEntries.add(entry); + } else { + deleteEntriesToRewrite.add(entry); + } } } } - // Sort and write ADD entries - if (!addEntriesToRewrite.isEmpty()) { - addEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - Exception exception = null; - try { - writer.write(addEntriesToRewrite); - } catch (Exception e) { - exception = e; - } finally { - if (exception != null) { - writer.abort(); - throw exception; - } - writer.close(); - } - List sorted = writer.result(); - result.get(indexRange.getLeft()).addAll(sorted); + // Write ADD (or all) entries + if (!addEntries.isEmpty()) { + List sorted = + sortAndWriteEntries(addEntries, manifestFile, fieldComparator); + output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } - // Sort and write DELETE entries + // Write DELETE entries (minor compact only) if (!deleteEntriesToRewrite.isEmpty()) { - deleteEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - Exception exception = null; - try { - writer.write(deleteEntriesToRewrite); - } catch (Exception e) { - exception = e; - } finally { - if (exception != null) { - writer.abort(); - throw exception; - } - writer.close(); - } - List sorted = writer.result(); - result.get(indexRange.getRight()).addAll(sorted); + List sorted = + sortAndWriteEntries(deleteEntriesToRewrite, manifestFile, fieldComparator); + output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } + /** Sort entries and write them to a new manifest file with proper error handling. */ + private static List sortAndWriteEntries( + List entries, + ManifestFile manifestFile, + RecordComparator fieldComparator) + throws Exception { + entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + RollingFileWriter writer = + manifestFile.createRollingWriter(); + Exception exception = null; + try { + writer.write(entries); + } catch (Exception e) { + exception = e; + } finally { + if (exception != null) { + writer.abort(); + throw exception; + } + writer.close(); + } + return writer.result(); + } + /** * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, kind, fileName)}. * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field @@ -760,6 +872,99 @@ private static FullCompactionReadResult readForSortRewrite( return new FullCompactionReadResult(meta, true, entries); } + /** Strategy interface for writing compaction results. */ + interface RewriteOutput { + void addUnchanged(ManifestFileMeta file); + + void addAllUnchanged(List files); + + void addSortedFiles(List files); + + void addDeleteFiles(List files); + } + + private static class FullCompactOutput implements RewriteOutput { + private final List result; + + FullCompactOutput(List result) { + this.result = result; + } + + @Override + public void addUnchanged(ManifestFileMeta file) { + result.add(file); + } + + @Override + public void addAllUnchanged(List files) { + result.addAll(files); + } + + @Override + public void addSortedFiles(List files) { + result.addAll(files); + } + + @Override + public void addDeleteFiles(List files) { + result.addAll(files); + } + } + + private static class MinorCompactOutput implements RewriteOutput { + private final List> result; + private final Pair indexRange; + + MinorCompactOutput(List> result, Pair indexRange) { + this.result = result; + this.indexRange = indexRange; + } + + @Override + public void addUnchanged(ManifestFileMeta file) { + result.get(indexRange.getLeft()).add(file); + } + + @Override + public void addAllUnchanged(List files) { + result.get(indexRange.getLeft()).addAll(files); + } + + @Override + public void addSortedFiles(List files) { + result.get(indexRange.getLeft()).addAll(files); + } + + @Override + public void addDeleteFiles(List files) { + result.get(indexRange.getRight()).addAll(files); + } + } + + private static class CompactionContext { + final RecordComparator fieldComparator; + final Map defaultCompactionManifests; + final List lsmFiles; + @Nullable final Set deleteEntries; + final List levelRuns; + final List pickedRuns; + + CompactionContext( + RecordComparator fieldComparator, + Map defaultCompactionManifests, + List lsmFiles, + @Nullable Set deleteEntries, + List levelRuns, + List pickedRuns) { + this.fieldComparator = fieldComparator; + this.defaultCompactionManifests = defaultCompactionManifests; + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + this.levelRuns = levelRuns; + this.pickedRuns = pickedRuns; + } + } + /** A section of manifest files with pre-computed metadata. */ static class Section { final List files; From 450464647d12b0f316ff0d911647961d76773e6f Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 18:35:18 +0800 Subject: [PATCH 38/48] fix --- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 21 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index b293ed95a6c2..6d957ccd4342 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -481,7 +481,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation, If not configured, defaults to the first partition field."); + + " schema validation, if not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b351fc960316..7038dee65c5c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -192,16 +192,7 @@ private static List tryMinorCompact( throws Exception { long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Prepare compaction context - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, false); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; - - // Step 2: Build fileName -> index mapping and initialize 2D result + // Step 1: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); for (int i = 0; i < input.size(); i++) { fileNameToIndex.put(input.get(i).fileName(), i); @@ -211,6 +202,15 @@ private static List tryMinorCompact( result.add(new ArrayList<>()); } + // Step 2: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, false); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); @@ -990,7 +990,6 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { - /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ final Map defaultCompactionManifests; final List lsmFiles; From b76c8972441f5e1ec5db721cf4e50e76101d3351 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 20:40:33 +0800 Subject: [PATCH 39/48] comment --- .../paimon/operation/ManifestFileSorter.java | 11 +++++++++-- .../paimon/schema/SchemaValidation.java | 19 +++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7038dee65c5c..1e724f925a9b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -225,7 +225,7 @@ private static List tryMinorCompact( pickedRuns.size(), defaultCompactionMap.size()); - // Step 3: Collect reused files at their original index positions + // Step 3: Collect reused files at their original index positions and collect picked files Set pickedSet = new HashSet<>(pickedRuns); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { @@ -238,13 +238,13 @@ private static List tryMinorCompact( } } - // Step 4: Collect picked files and compute index range List pickedFiles = new ArrayList<>(); for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); + // Step 4: Compute index range int minIdx = Integer.MAX_VALUE; int maxIdx = Integer.MIN_VALUE; for (ManifestFileMeta meta : pickedFiles) { @@ -447,6 +447,9 @@ static List buildLevelSortedRuns( earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) >= 0) { // Current file's min >= run's max, append to this run + // Note: When min == max (boundary equality), files are considered non-overlapping + // and can be placed in the same SortedRun. This allows building fewer SortedRuns, + // improving compaction efficiency while maintaining correct sort order. earliestRun.add(file); runs.offer(earliestRun); } else { @@ -510,6 +513,10 @@ static List
    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); + // Note: Boundary equality (file.min == sectionMaxBound) results in separate sections. + // This avoids merge-sort overhead while maintaining partition filtering capability. + // Files with non-overlapping boundaries (including equal boundaries) can be processed + // independently without significantly impacting partition pruning efficiency. if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java index 7c60b81d0475..48939fff6ceb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java @@ -1022,16 +1022,15 @@ private static void validateManifestSort(TableSchema schema, CoreOptions options !schema.partitionKeys().isEmpty(), "Cannot enable '%s' for non-partition table.", CoreOptions.MANIFEST_SORT_ENABLED.key()); - } - - String sortPartitionField = options.manifestSortPartitionField(); - if (sortPartitionField != null && !sortPartitionField.isEmpty()) { - checkArgument( - schema.partitionKeys().contains(sortPartitionField), - "'%s' = '%s' is not a partition field. Available partition fields: %s.", - CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), - sortPartitionField, - schema.partitionKeys()); + String sortPartitionField = options.manifestSortPartitionField(); + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + checkArgument( + schema.partitionKeys().contains(sortPartitionField), + "'%s' = '%s' is not a partition field. Available partition fields: %s.", + CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), + sortPartitionField, + schema.partitionKeys()); + } } } } From 0cb6b2cc1f4179bf465c6c97a11194f541a4cb52 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:15:34 +0800 Subject: [PATCH 40/48] splitSortAndRewriteSection --- .../paimon/operation/ManifestFileSorter.java | 163 ++++++++++++------ 1 file changed, 106 insertions(+), 57 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1e724f925a9b..834d5d5ff11e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -28,7 +28,6 @@ import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -729,15 +728,16 @@ private static void rewriteSubSegments( } /** - * Unified method to sort and rewrite a section. + * Sort and rewrite a section. Dispatches to full or minor compact path. * - * @param fullCompaction if true, merge ADD+DELETE entries together; if false, separate them + *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are + * cleaned up on exception by the caller's catch block. */ private static void sortAndRewriteSection( List section, ManifestFile manifestFile, RecordComparator fieldComparator, - Set deleteEntries, + Set deletedIdentifiers, Map defaultCompactionMap, RewriteOutput output, List sortNewFiles, @@ -750,28 +750,111 @@ private static void sortAndRewriteSection( return; } - // Read all entries in parallel (common for both paths). - Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); + if (fullCompaction) { + sortAndRewriteFull( + section, + manifestFile, + fieldComparator, + deletedIdentifiers, + output, + sortNewFiles, + manifestReadParallelism); + } else { + sortAndRewriteMinor( + section, + manifestFile, + fieldComparator, + output, + sortNewFiles, + manifestReadParallelism); + } + } - List addEntries = new ArrayList<>(); - List deleteEntriesToRewrite = new ArrayList<>(); - for (FullCompactionReadResult readResult : + /** + * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them + * together, and write to output as a single sorted stream. + */ + private static void sortAndRewriteFull( + List section, + ManifestFile manifestFile, + RecordComparator fieldComparator, + Set deletedIdentifiers, + RewriteOutput output, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + // Read surviving ADD entries: filter out entries cancelled by deletedIdentifiers. + Function> reader = + meta -> { + List batch = new ArrayList<>(); + for (ManifestEntry entry : + manifestFile.read( + meta.fileName(), + meta.fileSize(), + FileEntry.addFilter(), + Filter.alwaysTrue())) { + if (!deletedIdentifiers.contains(entry.identifier())) { + batch.add(entry); + } + } + return batch; + }; + + List entries = new ArrayList<>(); + for (ManifestEntry entry : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - if (fullCompaction) { - addEntries.addAll(readResult.entries); - } else { - for (ManifestEntry entry : readResult.entries) { - if (entry.kind() == FileKind.ADD) { - addEntries.add(entry); - } else { - deleteEntriesToRewrite.add(entry); + entries.add(entry); + } + + if (!entries.isEmpty()) { + List sorted = + sortAndWriteEntries(entries, manifestFile, fieldComparator); + output.addSortedFiles(sorted); + sortNewFiles.addAll(sorted); + } + } + + /** + * Minor compaction path: read entries with ADD/DELETE classified in a single pass per file, + * then sort each group independently and write them to output. + * + *

    Each file is read in parallel (via sequentialBatchedExecute). The reader classifies + * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the + * main thread. + */ + private static void sortAndRewriteMinor( + List section, + ManifestFile manifestFile, + RecordComparator fieldComparator, + RewriteOutput output, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + // Read and classify ADD/DELETE in one pass per file. + // Returns Pair packed as a singleton list of a wrapper. + Function, List>>> reader = + meta -> { + List addBatch = new ArrayList<>(); + List deleteBatch = new ArrayList<>(); + for (ManifestEntry entry : + manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD) { + addBatch.add(entry); + } else { + deleteBatch.add(entry); + } } - } - } + return singletonList(Pair.of(addBatch, deleteBatch)); + }; + + List addEntries = new ArrayList<>(); + List deleteEntries = new ArrayList<>(); + for (Pair, List> pair : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + addEntries.addAll(pair.getLeft()); + deleteEntries.addAll(pair.getRight()); } - // Write ADD (or all) entries if (!addEntries.isEmpty()) { List sorted = sortAndWriteEntries(addEntries, manifestFile, fieldComparator); @@ -779,10 +862,9 @@ private static void sortAndRewriteSection( sortNewFiles.addAll(sorted); } - // Write DELETE entries (minor compact only) - if (!deleteEntriesToRewrite.isEmpty()) { + if (!deleteEntries.isEmpty()) { List sorted = - sortAndWriteEntries(deleteEntriesToRewrite, manifestFile, fieldComparator); + sortAndWriteEntries(deleteEntries, manifestFile, fieldComparator); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } @@ -847,38 +929,6 @@ static String resolveSortField(String sortPartitionField, RowType partitionType) return partitionType.getFieldNames().get(0); } - /** - * Read a single manifest file for sort rewrite. - * - *

    When {@code deletedIdentifiers} is non-empty (full compaction path), only surviving ADD - * entries (not cancelled by deletedIdentifiers) are kept, and DELETE entries are dropped - * because the full compaction has already resolved them. - * - *

    When {@code deletedIdentifiers} is empty (non-full-compaction path), all entries (both ADD - * and DELETE) are preserved to avoid losing unresolved DELETE entries. - */ - private static FullCompactionReadResult readForSortRewrite( - ManifestFileMeta meta, - ManifestFile manifestFile, - Set deletedIdentifiers) { - List entries = new ArrayList<>(); - if (deletedIdentifiers.isEmpty()) { - entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); - } else { - for (ManifestEntry entry : - manifestFile.read( - meta.fileName(), - meta.fileSize(), - FileEntry.addFilter(), - Filter.alwaysTrue())) { - if (!deletedIdentifiers.contains(entry.identifier())) { - entries.add(entry); - } - } - } - return new FullCompactionReadResult(meta, true, entries); - } - /** Strategy interface for writing compaction results. */ interface RewriteOutput { void addUnchanged(ManifestFileMeta file); @@ -998,7 +1048,6 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { final Map defaultCompactionManifests; - final List lsmFiles; @Nullable final Set deleteEntries; From ad21fa44ccafdd981b5496ec806b14d1bb6b6bc9 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:18:04 +0800 Subject: [PATCH 41/48] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 834d5d5ff11e..c0d9c4302dbf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -1049,12 +1049,12 @@ static Section merge(Section a, Section b) { private static class ClassifyResult { final Map defaultCompactionManifests; final List lsmFiles; - @Nullable final Set deleteEntries; + final Set deleteEntries; ClassifyResult( Map defaultCompactionManifests, List lsmFiles, - @Nullable Set deleteEntries) { + Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; this.lsmFiles = lsmFiles; this.deleteEntries = deleteEntries; From 781f678bdb1095ed09ea36d2125b88ffbfdfbb4b Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:35:04 +0800 Subject: [PATCH 42/48] fix --- .../paimon/operation/ManifestFileSorter.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c0d9c4302dbf..bea1da2b28a8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -191,17 +191,7 @@ private static List tryMinorCompact( throws Exception { long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Build fileName -> index mapping and initialize 2D result - Map fileNameToIndex = new HashMap<>(); - for (int i = 0; i < input.size(); i++) { - fileNameToIndex.put(input.get(i).fileName(), i); - } - List> result = new ArrayList<>(input.size()); - for (int i = 0; i < input.size(); i++) { - result.add(new ArrayList<>()); - } - - // Step 2: Prepare compaction context + // Step 1: Prepare compaction context (early-return if nothing to compact) CompactionContext ctx = prepareCompaction(input, manifestFile, partitionType, options, false); Map defaultCompactionMap = ctx.defaultCompactionManifests; @@ -224,7 +214,15 @@ private static List tryMinorCompact( pickedRuns.size(), defaultCompactionMap.size()); - // Step 3: Collect reused files at their original index positions and collect picked files + // Step 2: Build fileName -> index mapping and initialize 2D result + Map fileNameToIndex = new HashMap<>(); + List> result = new ArrayList<>(input.size()); + for (int i = 0; i < input.size(); i++) { + fileNameToIndex.put(input.get(i).fileName(), i); + result.add(new ArrayList<>()); + } + + // Step 3: Collect reused files and picked files Set pickedSet = new HashSet<>(pickedRuns); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { From 38457435be5000fe5650a5d29c2f7f24ce856624 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:38:15 +0800 Subject: [PATCH 43/48] fix --- .../test/java/org/apache/paimon/schema/SchemaValidationTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index 71d200ac6206..15810f210e9f 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -513,6 +513,7 @@ void testManifestSortValidation() { // Test 2: manifest-sort-partition-field not in partition keys should fail Map options2 = new HashMap<>(); + options2.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); options2.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); options2.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( From 18e9578b2170f8641c4f45119e9dcf17617940a7 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 00:38:33 +0800 Subject: [PATCH 44/48] refactor --- ...un.java => ManifestAdjacentSortedRun.java} | 28 +- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 468 ++++++------------ .../operation/ManifestPickStrategy.java | 18 +- 4 files changed, 175 insertions(+), 344 deletions(-) rename paimon-core/src/main/java/org/apache/paimon/operation/{ManifestSortedRun.java => ManifestAdjacentSortedRun.java} (67%) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java similarity index 67% rename from paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java rename to paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java index c270677e1f8d..4e1db69fb6dd 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java @@ -26,18 +26,18 @@ import java.util.stream.Collectors; /** - * A {@code ManifestSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single partition - * field (the configured manifest sort field). The intervals {@code [partitionStats.minValues[k], - * partitionStats.maxValues[k]]} of these manifests do not overlap on field {@code k}, where {@code - * k} is the configured sort field index. + * A {@code ManifestAdjacentSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single + * partition field (the configured manifest sort field). The intervals {@code + * [partitionStats.minValues[k], partitionStats.maxValues[k]]} of these manifests do not overlap on + * field {@code k}, where {@code k} is the configured sort field index. */ -public class ManifestSortedRun { +public class ManifestAdjacentSortedRun { private int level; private final List files; private final long totalSize; - private ManifestSortedRun(List files) { + private ManifestAdjacentSortedRun(List files) { this.level = -1; this.files = Collections.unmodifiableList(files); long size = 0L; @@ -48,12 +48,12 @@ private ManifestSortedRun(List files) { } /** - * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that - * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that - * intervals do not overlap on that field. + * Build a {@code ManifestAdjacentSortedRun} from an already-sorted list. The caller MUST + * guarantee that {@code sortedFiles} is sorted ascending on the configured sort field's min + * value, and that intervals do not overlap on that field. */ - public static ManifestSortedRun fromSorted(List sortedFiles) { - return new ManifestSortedRun(sortedFiles); + public static ManifestAdjacentSortedRun fromSorted(List sortedFiles) { + return new ManifestAdjacentSortedRun(sortedFiles); } public List files() { @@ -74,10 +74,10 @@ public void setLevel(int level) { @Override public boolean equals(Object o) { - if (!(o instanceof ManifestSortedRun)) { + if (!(o instanceof ManifestAdjacentSortedRun)) { return false; } - ManifestSortedRun that = (ManifestSortedRun) o; + ManifestAdjacentSortedRun that = (ManifestAdjacentSortedRun) o; return level == that.level && files.equals(that.files); } @@ -88,7 +88,7 @@ public int hashCode() { @Override public String toString() { - return "ManifestSortedRun{level=" + return "ManifestAdjacentSortedRun{level=" + level + ", files=[" + files.stream().map(ManifestFileMeta::fileName).collect(Collectors.joining(", ")) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fad84521f5fb..b10505570baf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,8 +78,9 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - return ManifestFileSorter.trySortRewrite( - input, newFilesForAbort, manifestFile, partitionType, options); + ManifestFileSorter sorter = + new ManifestFileSorter(manifestFile, partitionType, options); + return sorter.trySortRewrite(input, newFilesForAbort); } else { // Otherwise try full compaction first, then minor compaction if needed Optional> fullCompacted = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index bea1da2b28a8..d71d9f901502 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,6 +61,38 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); + // Immutable fields set at construction time + private final ManifestFile manifestFile; + private final RowType partitionType; + private final String sortPartitionField; + + private final long suggestedMetaSize; + private final int suggestedMinMetaCount; + private final long fullCompactionThreshold; + private final long maxRewriteSize; + private final int maxSizeAmplificationPercent; + private final int sortedRunSizeRatio; + @Nullable private final Integer manifestReadParallelism; + + // Mutable fields set during prepareCompaction + private boolean fullCompaction; + private RecordComparator fieldComparator; + private Set deleteEntries; + private Map defaultCompactionMap; + + ManifestFileSorter(ManifestFile manifestFile, RowType partitionType, CoreOptions options) { + this.manifestFile = manifestFile; + this.partitionType = partitionType; + this.sortPartitionField = options.manifestSortPartitionField(); + this.suggestedMetaSize = options.manifestTargetSize().getBytes(); + this.suggestedMinMetaCount = options.manifestMergeMinCount(); + this.fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); + this.maxRewriteSize = options.manifestSortMaxRewriteSize(); + this.maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); + this.sortedRunSizeRatio = options.sortedRunSizeRatio(); + this.manifestReadParallelism = options.scanManifestParallelism(); + } + /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. @@ -68,20 +100,14 @@ public class ManifestFileSorter { *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link * #tryMinorCompact} otherwise. */ - static List trySortRewrite( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + List trySortRewrite( + List input, List newFilesForAbort) throws Exception { - // Try full compaction first, then minor compaction if full compact is skipped - Optional> fullCompacted = - tryFullCompact(input, newFilesForAbort, manifestFile, partitionType, options); + Optional> fullCompacted = tryFullCompact(input, newFilesForAbort); if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact(input, newFilesForAbort, manifestFile, partitionType, options); + return tryMinorCompact(input, newFilesForAbort); } /** @@ -90,15 +116,9 @@ static List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private static Optional> tryFullCompact( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + private Optional> tryFullCompact( + List input, List newFilesForAbort) throws Exception { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Check if full compaction threshold is met long totalDeltaFileSize = 0; for (ManifestFileMeta file : input) { @@ -106,18 +126,16 @@ private static Optional> tryFullCompact( totalDeltaFileSize += file.fileSize(); } } - if (totalDeltaFileSize < options.manifestFullCompactionThresholdSize().getBytes()) { + if (totalDeltaFileSize < fullCompactionThreshold) { + this.fullCompaction = false; return Optional.empty(); } - + this.fullCompaction = true; // Step 2: Prepare compaction context - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, true); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; + Pair, List> runsPair = + prepareCompaction(input); + List levelRuns = runsPair.getLeft(); + List pickedRuns = runsPair.getRight(); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( @@ -134,15 +152,15 @@ private static Optional> tryFullCompact( defaultCompactionMap.size()); // Step 3: Collect reused files (not picked) and picked files - Set pickedSet = new HashSet<>(pickedRuns); + Set pickedSet = new HashSet<>(pickedRuns); List result = new ArrayList<>(); - for (ManifestSortedRun run : levelRuns) { + for (ManifestAdjacentSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { result.addAll(run.files()); } } List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { + for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); @@ -154,19 +172,7 @@ private static Optional> tryFullCompact( // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - options.manifestMergeMinCount(), - options.manifestSortMaxRewriteSize(), - output, - newFilesForAbort, - options.scanManifestParallelism(), - true); + rewriteSections(sections, output, newFilesForAbort); LOG.info( "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", @@ -182,23 +188,14 @@ private static Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private static List tryMinorCompact( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + private List tryMinorCompact( + List input, List newFilesForAbort) throws Exception { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Prepare compaction context (early-return if nothing to compact) - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, false); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; + Pair, List> runsPair = + prepareCompaction(input); + List levelRuns = runsPair.getLeft(); + List pickedRuns = runsPair.getRight(); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( @@ -223,8 +220,8 @@ private static List tryMinorCompact( } // Step 3: Collect reused files and picked files - Set pickedSet = new HashSet<>(pickedRuns); - for (ManifestSortedRun run : levelRuns) { + Set pickedSet = new HashSet<>(pickedRuns); + for (ManifestAdjacentSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { for (ManifestFileMeta file : run.files()) { Integer idx = fileNameToIndex.get(file.fileName()); @@ -236,7 +233,7 @@ private static List tryMinorCompact( } List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { + for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); @@ -259,20 +256,8 @@ private static List tryMinorCompact( sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 6: Rewrite sections - MinorCompactOutput output = new MinorCompactOutput(result, indexRange); - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - options.manifestMergeMinCount(), - options.manifestSortMaxRewriteSize(), - output, - newFilesForAbort, - options.scanManifestParallelism(), - false); + MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); + rewriteSections(sections, output, newFilesForAbort); // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); @@ -289,18 +274,14 @@ private static List tryMinorCompact( } /** - * Prepare compaction context by extracting common logic from tryFullCompact and - * tryMinorCompact. + * Prepare compaction context: resolve sort field, classify manifests, build level runs, and + * pick runs for compaction. Sets instance fields: fullCompaction, fieldComparator, + * deleteEntries, defaultCompactionMap. + * + * @return Pair of (levelRuns, pickedRuns) */ - private static CompactionContext prepareCompaction( - List input, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options, - boolean fullCompaction) { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); + private Pair, List> + prepareCompaction(List input) { String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -308,71 +289,52 @@ private static CompactionContext prepareCompaction( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - RecordComparator fieldComparator = + this.fieldComparator = CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); - ClassifyResult classified = - classifyManifests( - input, - suggestedMetaSize, - manifestFile, - partitionType, - fullCompaction, - manifestReadParallelism); - - List lsmFiles = classified.lsmFiles; - List levelRuns = + List lsmFiles = classifyManifests(input); + List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() : buildLevelSortedRuns(lsmFiles, fieldComparator); ManifestPickStrategy pickStrategy = - new ManifestPickStrategy( - options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); - List pickedRuns = pickStrategy.pick(levelRuns); - - return new CompactionContext( - fieldComparator, - classified.defaultCompactionManifests, - classified.lsmFiles, - classified.deleteEntries, - levelRuns, - pickedRuns); + new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); + List pickedRuns = pickStrategy.pick(levelRuns); + + return Pair.of(levelRuns, pickedRuns); } /** - * Classify manifest files into default-compaction group and LSM group. + * Classify manifest files into default-compaction group and LSM group. Sets instance fields + * {@link #deleteEntries} and {@link #defaultCompactionMap}. * *

    Full compaction: small files and files overlapping delete partitions go into - * defaultCompactionManifests; the rest stay as lsmFiles. + * defaultCompactionMap; the rest are returned as lsmFiles. + * + *

    Non-full compaction: small files go to defaultCompactionMap for minor-style merge; the + * rest are returned as lsmFiles. * - *

    Non-full compaction: delete-overlapping files go to result, small files go to - * defaultCompactionManifests for minor-style merge. + * @return lsmFiles that should participate in LSM-tree compaction */ - private static ClassifyResult classifyManifests( - List input, - long suggestedMetaSize, - ManifestFile manifestFile, - RowType partitionType, - boolean fullCompaction, - @Nullable Integer manifestReadParallelism) { + private List classifyManifests(List input) { // Initialize classification containers and read delete entries - Map defaultCompactionManifests = new LinkedHashMap<>(); + Map classifiedDefaultMap = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = Collections.emptySet(); + Set classifiedDeleteEntries = Collections.emptySet(); PartitionPredicate predicate = null; if (fullCompaction) { - deleteEntries = + classifiedDeleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); // Build partition predicate from delete entries for overlap detection - if (deleteEntries.isEmpty()) { + if (classifiedDeleteEntries.isEmpty()) { predicate = PartitionPredicate.ALWAYS_FALSE; } else { if (partitionType.getFieldCount() > 0) { Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); + ManifestFileMerger.computeDeletePartitions(classifiedDeleteEntries); predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); } else { predicate = PartitionPredicate.ALWAYS_TRUE; @@ -394,10 +356,14 @@ private static ClassifyResult classifyManifests( file.partitionStats().nullCounts()); if (small || inDeleteRange) { iterator.remove(); - defaultCompactionManifests.put(file, inDeleteRange); + classifiedDefaultMap.put(file, inDeleteRange); } } - return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); + + // Set instance fields + this.deleteEntries = classifiedDeleteEntries; + this.defaultCompactionMap = classifiedDefaultMap; + return lsmFiles; } /** @@ -405,7 +371,7 @@ private static ClassifyResult classifyManifests( * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 * largest to level 1~4, rest to level 0). */ - static List buildLevelSortedRuns( + static List buildLevelSortedRuns( List input, RecordComparator fieldComparator) { // Step 1: Sort by min value (if equal, then by max value) input.sort( @@ -444,9 +410,10 @@ static List buildLevelSortedRuns( earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) >= 0) { // Current file's min >= run's max, append to this run - // Note: When min == max (boundary equality), files are considered non-overlapping - // and can be placed in the same SortedRun. This allows building fewer SortedRuns, - // improving compaction efficiency while maintaining correct sort order. + // Note: When min == max (boundary equality), files are considered + // non-overlapping and can be placed in the same SortedRun. This allows + // building fewer SortedRuns, improving compaction efficiency while + // maintaining correct sort order. earliestRun.add(file); runs.offer(earliestRun); } else { @@ -458,14 +425,14 @@ static List buildLevelSortedRuns( } } - // Step 3: Convert to ManifestSortedRun list - List result = new ArrayList<>(); + // Step 3: Convert to ManifestAdjacentSortedRun list + List result = new ArrayList<>(); while (!runs.isEmpty()) { - result.add(ManifestSortedRun.fromSorted(runs.poll())); + result.add(ManifestAdjacentSortedRun.fromSorted(runs.poll())); } // Step 4: Sort by totalSize and assign levels - result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + result.sort(Comparator.comparingLong(ManifestAdjacentSortedRun::totalSize)); int n = result.size(); int maxLevel = ManifestPickStrategy.MAX_LEVEL; for (int i = 0; i < n; i++) { @@ -510,10 +477,11 @@ static List

    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); - // Note: Boundary equality (file.min == sectionMaxBound) results in separate sections. - // This avoids merge-sort overhead while maintaining partition filtering capability. - // Files with non-overlapping boundaries (including equal boundaries) can be processed - // independently without significantly impacting partition pruning efficiency. + // Note: Boundary equality (file.min == sectionMaxBound) results in separate + // sections. This avoids merge-sort overhead while maintaining partition filtering + // capability. Files with non-overlapping boundaries (including equal boundaries) + // can be processed independently without significantly impacting partition pruning + // efficiency. if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); @@ -567,20 +535,9 @@ private static List
    mergeSmallAdjacentSections( return merged; } - /** Unified method to rewrite sections with budget control. */ - private static void rewriteSections( - List
    sections, - Map defaultCompactionMap, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deleteEntries, - long suggestedMetaSize, - int suggestedMinMetaCount, - long maxRewriteSize, - RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + /** Rewrite sections with budget control. */ + private void rewriteSections( + List
    sections, RewriteOutput output, List sortNewFiles) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -588,31 +545,13 @@ private static void rewriteSections( for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); if (section.files.size() == 1) { - sortAndRewriteSection( - section.files, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(section.files, output, sortNewFiles); continue; } if (processedSize + section.totalSize <= maxRewriteSize) { processedSize += section.totalSize; - sortAndRewriteSection( - section.files, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(section.files, output, sortNewFiles); } else if (!reachedLimit) { long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; @@ -635,16 +574,7 @@ private static void rewriteSections( } } - sortAndRewriteSection( - rewriteFiles, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(rewriteFiles, output, sortNewFiles); if (!remainingFiles.isEmpty()) { Section remainingSection = @@ -653,37 +583,18 @@ private static void rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - rewriteSubSegments( - section.files, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - suggestedMinMetaCount, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + rewriteSubSegments(section.files, output, sortNewFiles); } else { output.addAllUnchanged(section.files); } } } - /** Unified method to rewrite sub-segments with budget control. */ - private static void rewriteSubSegments( + /** Rewrite sub-segments within a section that exceeded the budget. */ + private void rewriteSubSegments( List section, - Map defaultCompactionMap, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deleteEntries, - long manifestTargetSize, - int suggestedMinMetaCount, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + List sortNewFiles) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -691,17 +602,8 @@ private static void rewriteSubSegments( subSegmentSize += m.fileSize(); subSegment.add(m); - if (subSegmentSize >= manifestTargetSize) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + if (subSegmentSize >= suggestedMetaSize) { + sortAndRewriteSection(subSegment, output, sortNewFiles); subSegment.clear(); subSegmentSize = 0; } @@ -709,16 +611,7 @@ private static void rewriteSubSegments( // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(subSegment, output, sortNewFiles); } else { output.addAllUnchanged(subSegment); } @@ -731,16 +624,10 @@ private static void rewriteSubSegments( *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are * cleaned up on exception by the caller's catch block. */ - private static void sortAndRewriteSection( + private void sortAndRewriteSection( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deletedIdentifiers, - Map defaultCompactionMap, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + List sortNewFiles) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { @@ -749,22 +636,9 @@ private static void sortAndRewriteSection( } if (fullCompaction) { - sortAndRewriteFull( - section, - manifestFile, - fieldComparator, - deletedIdentifiers, - output, - sortNewFiles, - manifestReadParallelism); + sortAndRewriteFull(section, output, sortNewFiles); } else { - sortAndRewriteMinor( - section, - manifestFile, - fieldComparator, - output, - sortNewFiles, - manifestReadParallelism); + sortAndRewriteMinor(section, output, sortNewFiles); } } @@ -772,16 +646,12 @@ private static void sortAndRewriteSection( * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them * together, and write to output as a single sorted stream. */ - private static void sortAndRewriteFull( + private void sortAndRewriteFull( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deletedIdentifiers, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism) + List sortNewFiles) throws Exception { - // Read surviving ADD entries: filter out entries cancelled by deletedIdentifiers. + // Read surviving ADD entries: filter out entries cancelled by deleteEntries. Function> reader = meta -> { List batch = new ArrayList<>(); @@ -791,7 +661,7 @@ private static void sortAndRewriteFull( meta.fileSize(), FileEntry.addFilter(), Filter.alwaysTrue())) { - if (!deletedIdentifiers.contains(entry.identifier())) { + if (!deleteEntries.contains(entry.identifier())) { batch.add(entry); } } @@ -805,8 +675,7 @@ private static void sortAndRewriteFull( } if (!entries.isEmpty()) { - List sorted = - sortAndWriteEntries(entries, manifestFile, fieldComparator); + List sorted = sortAndWriteEntries(entries); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } @@ -820,16 +689,12 @@ private static void sortAndRewriteFull( * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the * main thread. */ - private static void sortAndRewriteMinor( + private void sortAndRewriteMinor( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism) + List sortNewFiles) throws Exception { // Read and classify ADD/DELETE in one pass per file. - // Returns Pair packed as a singleton list of a wrapper. Function, List>>> reader = meta -> { List addBatch = new ArrayList<>(); @@ -846,33 +711,28 @@ private static void sortAndRewriteMinor( }; List addEntries = new ArrayList<>(); - List deleteEntries = new ArrayList<>(); + List minorDeleteEntries = new ArrayList<>(); for (Pair, List> pair : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { addEntries.addAll(pair.getLeft()); - deleteEntries.addAll(pair.getRight()); + minorDeleteEntries.addAll(pair.getRight()); } if (!addEntries.isEmpty()) { - List sorted = - sortAndWriteEntries(addEntries, manifestFile, fieldComparator); + List sorted = sortAndWriteEntries(addEntries); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } - if (!deleteEntries.isEmpty()) { - List sorted = - sortAndWriteEntries(deleteEntries, manifestFile, fieldComparator); + if (!minorDeleteEntries.isEmpty()) { + List sorted = sortAndWriteEntries(minorDeleteEntries); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } /** Sort entries and write them to a new manifest file with proper error handling. */ - private static List sortAndWriteEntries( - List entries, - ManifestFile manifestFile, - RecordComparator fieldComparator) + private List sortAndWriteEntries(List entries) throws Exception { entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = @@ -969,20 +829,28 @@ public void addDeleteFiles(List files) { private static class MinorCompactOutput implements RewriteOutput { private final List> result; private final Pair indexRange; + private final Map fileNameToIndex; - MinorCompactOutput(List> result, Pair indexRange) { + MinorCompactOutput( + List> result, + Pair indexRange, + Map fileNameToIndex) { this.result = result; this.indexRange = indexRange; + this.fileNameToIndex = fileNameToIndex; } @Override public void addUnchanged(ManifestFileMeta file) { - result.get(indexRange.getLeft()).add(file); + Integer idx = fileNameToIndex.get(file.fileName()); + result.get(idx).add(file); } @Override public void addAllUnchanged(List files) { - result.get(indexRange.getLeft()).addAll(files); + for (ManifestFileMeta file : files) { + addUnchanged(file); + } } @Override @@ -996,30 +864,6 @@ public void addDeleteFiles(List files) { } } - private static class CompactionContext { - final RecordComparator fieldComparator; - final Map defaultCompactionManifests; - final List lsmFiles; - @Nullable final Set deleteEntries; - final List levelRuns; - final List pickedRuns; - - CompactionContext( - RecordComparator fieldComparator, - Map defaultCompactionManifests, - List lsmFiles, - @Nullable Set deleteEntries, - List levelRuns, - List pickedRuns) { - this.fieldComparator = fieldComparator; - this.defaultCompactionManifests = defaultCompactionManifests; - this.lsmFiles = lsmFiles; - this.deleteEntries = deleteEntries; - this.levelRuns = levelRuns; - this.pickedRuns = pickedRuns; - } - } - /** A section of manifest files with pre-computed metadata. */ static class Section { final List files; @@ -1042,20 +886,4 @@ static Section merge(Section a, Section b) { a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } - - /** Result of classifying manifest files into default-compaction and LSM groups. */ - private static class ClassifyResult { - final Map defaultCompactionManifests; - final List lsmFiles; - final Set deleteEntries; - - ClassifyResult( - Map defaultCompactionManifests, - List lsmFiles, - Set deleteEntries) { - this.defaultCompactionManifests = defaultCompactionManifests; - this.lsmFiles = lsmFiles; - this.deleteEntries = deleteEntries; - } - } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 3a8693d4dcf0..519c49676ce3 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -56,13 +56,13 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @param levelRuns runs with assigned levels (level 0~4) * @return list of picked runs to compact */ - public List pick(List levelRuns) { + public List pick(List levelRuns) { if (levelRuns.isEmpty() || levelRuns.size() <= MAX_LEVEL) { return new ArrayList<>(); } // Try SizeAmp first - List sizeAmpResult = pickForSizeAmp(levelRuns); + List sizeAmpResult = pickForSizeAmp(levelRuns); if (sizeAmpResult != null) { return sizeAmpResult; } @@ -78,13 +78,14 @@ public List pick(List levelRuns) { *

    Formula (consistent with {@code UniversalCompaction#pickForSizeAmp}): {@code * lowerLevelTotalSize * 100 > sizeAmpThreshold * highestRunSize} */ - private List pickForSizeAmp(List levelRuns) { + private List pickForSizeAmp( + List levelRuns) { if (levelRuns.isEmpty()) { return null; } // The last run has the highest level (set by buildLevelSortedRuns) - ManifestSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); + ManifestAdjacentSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); int maxLevel = highestRun.level(); if (maxLevel <= 0) { @@ -92,7 +93,7 @@ private List pickForSizeAmp(List levelRuns } long lowerLevelTotalSize = 0; - for (ManifestSortedRun run : levelRuns) { + for (ManifestAdjacentSortedRun run : levelRuns) { if (run.level() < maxLevel) { lowerLevelTotalSize += run.totalSize(); } @@ -117,9 +118,10 @@ private List pickForSizeAmp(List levelRuns *

    Formula (consistent with {@code UniversalCompaction#pickForSizeRatio}): {@code pickedSize * * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize} */ - private List pickForSizeRatioAndForce(List levelRuns) { + private List pickForSizeRatioAndForce( + List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) - List picked = new ArrayList<>(); + List picked = new ArrayList<>(); // Always pick the first run to guarantee a non-empty result. picked.add(levelRuns.get(0)); @@ -127,7 +129,7 @@ private List pickForSizeRatioAndForce(List // From the second run onward: forced pick level0/level1, then SizeRatio for the rest. for (int i = 1; i < levelRuns.size(); i++) { - ManifestSortedRun run = levelRuns.get(i); + ManifestAdjacentSortedRun run = levelRuns.get(i); if (run.level() <= 1) { picked.add(run); pickedSize += run.totalSize(); From 01e0af4f64eeb85eb597aaa31bcb6477238afa05 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 00:43:09 +0800 Subject: [PATCH 45/48] fix --- docs/layouts/shortcodes/generated/core_configuration.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index e6fcb5f93587..3cb2ceb2db07 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -913,7 +913,7 @@

    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation, If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation, if not configured, defaults to the first partition field.
    manifest-sort.max-rewrite-size
    From 9236ce4173eaf87541c5c8a57874784e5cfa7920 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 16:10:07 +0800 Subject: [PATCH 46/48] static --- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 401 +++++++++++++----- 2 files changed, 300 insertions(+), 106 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index b10505570baf..fad84521f5fb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,9 +78,8 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - ManifestFileSorter sorter = - new ManifestFileSorter(manifestFile, partitionType, options); - return sorter.trySortRewrite(input, newFilesForAbort); + return ManifestFileSorter.trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); } else { // Otherwise try full compaction first, then minor compaction if needed Optional> fullCompacted = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index d71d9f901502..1ed980856c07 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,36 +61,47 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); - // Immutable fields set at construction time - private final ManifestFile manifestFile; - private final RowType partitionType; - private final String sortPartitionField; - - private final long suggestedMetaSize; - private final int suggestedMinMetaCount; - private final long fullCompactionThreshold; - private final long maxRewriteSize; - private final int maxSizeAmplificationPercent; - private final int sortedRunSizeRatio; - @Nullable private final Integer manifestReadParallelism; - - // Mutable fields set during prepareCompaction - private boolean fullCompaction; - private RecordComparator fieldComparator; - private Set deleteEntries; - private Map defaultCompactionMap; - - ManifestFileSorter(ManifestFile manifestFile, RowType partitionType, CoreOptions options) { - this.manifestFile = manifestFile; - this.partitionType = partitionType; - this.sortPartitionField = options.manifestSortPartitionField(); - this.suggestedMetaSize = options.manifestTargetSize().getBytes(); - this.suggestedMinMetaCount = options.manifestMergeMinCount(); - this.fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); - this.maxRewriteSize = options.manifestSortMaxRewriteSize(); - this.maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); - this.sortedRunSizeRatio = options.sortedRunSizeRatio(); - this.manifestReadParallelism = options.scanManifestParallelism(); + private ManifestFileSorter() {} + + /** Context object that carries shared state across compaction methods. */ + static class CompactionContext { + final boolean fullCompaction; + final RecordComparator fieldComparator; + final Set deleteEntries; + final Map defaultCompactionMap; + final List levelRuns; + final List pickedRuns; + + CompactionContext( + boolean fullCompaction, + RecordComparator fieldComparator, + Set deleteEntries, + Map defaultCompactionMap, + List levelRuns, + List pickedRuns) { + this.fullCompaction = fullCompaction; + this.fieldComparator = fieldComparator; + this.deleteEntries = deleteEntries; + this.defaultCompactionMap = defaultCompactionMap; + this.levelRuns = levelRuns; + this.pickedRuns = pickedRuns; + } + } + + /** Result of classifying manifest files. */ + private static class ClassifyResult { + final List lsmFiles; + final Set deleteEntries; + final Map defaultCompactionMap; + + ClassifyResult( + List lsmFiles, + Set deleteEntries, + Map defaultCompactionMap) { + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + this.defaultCompactionMap = defaultCompactionMap; + } } /** @@ -100,14 +111,51 @@ public class ManifestFileSorter { *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link * #tryMinorCompact} otherwise. */ - List trySortRewrite( - List input, List newFilesForAbort) + static List trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) throws Exception { - Optional> fullCompacted = tryFullCompact(input, newFilesForAbort); + String sortPartitionField = options.manifestSortPartitionField(); + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + int suggestedMinMetaCount = options.manifestMergeMinCount(); + long fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); + int maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); + int sortedRunSizeRatio = options.sortedRunSizeRatio(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + + Optional> fullCompacted = + tryFullCompact( + input, + newFilesForAbort, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + suggestedMinMetaCount, + fullCompactionThreshold, + maxRewriteSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact(input, newFilesForAbort); + return tryMinorCompact( + input, + newFilesForAbort, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); } /** @@ -116,8 +164,19 @@ List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private Optional> tryFullCompact( - List input, List newFilesForAbort) + private static Optional> tryFullCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int suggestedMinMetaCount, + long fullCompactionThreshold, + long maxRewriteSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) throws Exception { // Step 1: Check if full compaction threshold is met long totalDeltaFileSize = 0; @@ -127,17 +186,24 @@ private Optional> tryFullCompact( } } if (totalDeltaFileSize < fullCompactionThreshold) { - this.fullCompaction = false; return Optional.empty(); } - this.fullCompaction = true; // Step 2: Prepare compaction context - Pair, List> runsPair = - prepareCompaction(input); - List levelRuns = runsPair.getLeft(); - List pickedRuns = runsPair.getRight(); - - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + CompactionContext ctx = + prepareCompaction( + input, + true, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + + if (pickedRuns.isEmpty() && ctx.defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort full compact skipped: no runs picked and no defaultCompaction files."); return Optional.empty(); @@ -149,7 +215,7 @@ private Optional> tryFullCompact( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionMap.size()); + ctx.defaultCompactionMap.size()); // Step 3: Collect reused files (not picked) and picked files Set pickedSet = new HashSet<>(pickedRuns); @@ -163,16 +229,25 @@ private Optional> tryFullCompact( for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionMap.keySet()); + pickedFiles.addAll(ctx.defaultCompactionMap.keySet()); // Step 4: Split into sections and merge small adjacent sections List

    sections = - splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); - rewriteSections(sections, output, newFilesForAbort); + rewriteSections( + sections, + output, + newFilesForAbort, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + manifestReadParallelism); LOG.info( "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", @@ -188,16 +263,35 @@ private Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private List tryMinorCompact( - List input, List newFilesForAbort) + private static List tryMinorCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int suggestedMinMetaCount, + long maxRewriteSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) throws Exception { // Step 1: Prepare compaction context (early-return if nothing to compact) - Pair, List> runsPair = - prepareCompaction(input); - List levelRuns = runsPair.getLeft(); - List pickedRuns = runsPair.getRight(); - - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + CompactionContext ctx = + prepareCompaction( + input, + false, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + + if (pickedRuns.isEmpty() && ctx.defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); return input; @@ -209,7 +303,7 @@ private List tryMinorCompact( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionMap.size()); + ctx.defaultCompactionMap.size()); // Step 2: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); @@ -236,7 +330,7 @@ private List tryMinorCompact( for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionMap.keySet()); + pickedFiles.addAll(ctx.defaultCompactionMap.keySet()); // Step 4: Compute index range int minIdx = Integer.MAX_VALUE; @@ -252,12 +346,21 @@ private List tryMinorCompact( // Step 5: Split into sections and merge small adjacent sections List

    sections = - splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 6: Rewrite sections MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); - rewriteSections(sections, output, newFilesForAbort); + rewriteSections( + sections, + output, + newFilesForAbort, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + manifestReadParallelism); // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); @@ -275,13 +378,20 @@ private List tryMinorCompact( /** * Prepare compaction context: resolve sort field, classify manifests, build level runs, and - * pick runs for compaction. Sets instance fields: fullCompaction, fieldComparator, - * deleteEntries, defaultCompactionMap. + * pick runs for compaction. * - * @return Pair of (levelRuns, pickedRuns) + * @return CompactionContext containing all shared state */ - private Pair, List> - prepareCompaction(List input) { + private static CompactionContext prepareCompaction( + List input, + boolean fullCompaction, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) { String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -289,11 +399,20 @@ private List tryMinorCompact( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - this.fieldComparator = + RecordComparator fieldComparator = CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); - List lsmFiles = classifyManifests(input); + ClassifyResult classifyResult = + classifyManifests( + input, + fullCompaction, + manifestFile, + partitionType, + suggestedMetaSize, + manifestReadParallelism); + List lsmFiles = classifyResult.lsmFiles; + List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() @@ -303,12 +422,17 @@ private List tryMinorCompact( new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); List pickedRuns = pickStrategy.pick(levelRuns); - return Pair.of(levelRuns, pickedRuns); + return new CompactionContext( + fullCompaction, + fieldComparator, + classifyResult.deleteEntries, + classifyResult.defaultCompactionMap, + levelRuns, + pickedRuns); } /** - * Classify manifest files into default-compaction group and LSM group. Sets instance fields - * {@link #deleteEntries} and {@link #defaultCompactionMap}. + * Classify manifest files into default-compaction group and LSM group. * *

    Full compaction: small files and files overlapping delete partitions go into * defaultCompactionMap; the rest are returned as lsmFiles. @@ -316,9 +440,15 @@ private List tryMinorCompact( *

    Non-full compaction: small files go to defaultCompactionMap for minor-style merge; the * rest are returned as lsmFiles. * - * @return lsmFiles that should participate in LSM-tree compaction + * @return ClassifyResult containing lsmFiles, deleteEntries, and defaultCompactionMap */ - private List classifyManifests(List input) { + private static ClassifyResult classifyManifests( + List input, + boolean fullCompaction, + ManifestFile manifestFile, + RowType partitionType, + long suggestedMetaSize, + @Nullable Integer manifestReadParallelism) { // Initialize classification containers and read delete entries Map classifiedDefaultMap = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); @@ -360,10 +490,7 @@ private List classifyManifests(List input) { } } - // Set instance fields - this.deleteEntries = classifiedDeleteEntries; - this.defaultCompactionMap = classifiedDefaultMap; - return lsmFiles; + return new ClassifyResult(lsmFiles, classifiedDeleteEntries, classifiedDefaultMap); } /** @@ -536,8 +663,16 @@ private static List

    mergeSmallAdjacentSections( } /** Rewrite sections with budget control. */ - private void rewriteSections( - List
    sections, RewriteOutput output, List sortNewFiles) + private static void rewriteSections( + List
    sections, + RewriteOutput output, + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + long suggestedMetaSize, + int suggestedMinMetaCount, + long maxRewriteSize, + @Nullable Integer manifestReadParallelism) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -545,13 +680,25 @@ private void rewriteSections( for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); if (section.files.size() == 1) { - sortAndRewriteSection(section.files, output, sortNewFiles); + sortAndRewriteSection( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); continue; } if (processedSize + section.totalSize <= maxRewriteSize) { processedSize += section.totalSize; - sortAndRewriteSection(section.files, output, sortNewFiles); + sortAndRewriteSection( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); } else if (!reachedLimit) { long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; @@ -568,13 +715,19 @@ private void rewriteSections( } else { remainingFiles.add(file); remainingSize += file.fileSize(); - if (defaultCompactionMap.containsKey(file)) { + if (ctx.defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } } } - sortAndRewriteSection(rewriteFiles, output, sortNewFiles); + sortAndRewriteSection( + rewriteFiles, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); if (!remainingFiles.isEmpty()) { Section remainingSection = @@ -583,7 +736,15 @@ private void rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - rewriteSubSegments(section.files, output, sortNewFiles); + rewriteSubSegments( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + manifestReadParallelism); } else { output.addAllUnchanged(section.files); } @@ -591,10 +752,15 @@ private void rewriteSections( } /** Rewrite sub-segments within a section that exceeded the budget. */ - private void rewriteSubSegments( + private static void rewriteSubSegments( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + long suggestedMetaSize, + int suggestedMinMetaCount, + @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -603,15 +769,27 @@ private void rewriteSubSegments( subSegment.add(m); if (subSegmentSize >= suggestedMetaSize) { - sortAndRewriteSection(subSegment, output, sortNewFiles); + sortAndRewriteSection( + subSegment, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); subSegment.clear(); subSegmentSize = 0; } } // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { - if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { - sortAndRewriteSection(subSegment, output, sortNewFiles); + if (!ctx.deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { + sortAndRewriteSection( + subSegment, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); } else { output.addAllUnchanged(subSegment); } @@ -624,21 +802,26 @@ private void rewriteSubSegments( *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are * cleaned up on exception by the caller's catch block. */ - private void sortAndRewriteSection( + private static void sortAndRewriteSection( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. - if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { + if (section.size() == 1 && !ctx.defaultCompactionMap.getOrDefault(section.get(0), false)) { output.addUnchanged(section.get(0)); return; } - if (fullCompaction) { - sortAndRewriteFull(section, output, sortNewFiles); + if (ctx.fullCompaction) { + sortAndRewriteFull( + section, output, sortNewFiles, ctx, manifestFile, manifestReadParallelism); } else { - sortAndRewriteMinor(section, output, sortNewFiles); + sortAndRewriteMinor( + section, output, sortNewFiles, ctx, manifestFile, manifestReadParallelism); } } @@ -646,10 +829,13 @@ private void sortAndRewriteSection( * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them * together, and write to output as a single sorted stream. */ - private void sortAndRewriteFull( + private static void sortAndRewriteFull( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Read surviving ADD entries: filter out entries cancelled by deleteEntries. Function> reader = @@ -661,7 +847,7 @@ private void sortAndRewriteFull( meta.fileSize(), FileEntry.addFilter(), Filter.alwaysTrue())) { - if (!deleteEntries.contains(entry.identifier())) { + if (!ctx.deleteEntries.contains(entry.identifier())) { batch.add(entry); } } @@ -675,7 +861,8 @@ private void sortAndRewriteFull( } if (!entries.isEmpty()) { - List sorted = sortAndWriteEntries(entries); + List sorted = + sortAndWriteEntries(entries, ctx.fieldComparator, manifestFile); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } @@ -689,10 +876,13 @@ private void sortAndRewriteFull( * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the * main thread. */ - private void sortAndRewriteMinor( + private static void sortAndRewriteMinor( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Read and classify ADD/DELETE in one pass per file. Function, List>>> reader = @@ -719,20 +909,25 @@ private void sortAndRewriteMinor( } if (!addEntries.isEmpty()) { - List sorted = sortAndWriteEntries(addEntries); + List sorted = + sortAndWriteEntries(addEntries, ctx.fieldComparator, manifestFile); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } if (!minorDeleteEntries.isEmpty()) { - List sorted = sortAndWriteEntries(minorDeleteEntries); + List sorted = + sortAndWriteEntries(minorDeleteEntries, ctx.fieldComparator, manifestFile); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } /** Sort entries and write them to a new manifest file with proper error handling. */ - private List sortAndWriteEntries(List entries) + private static List sortAndWriteEntries( + List entries, + RecordComparator fieldComparator, + ManifestFile manifestFile) throws Exception { entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = From 2f767138aa5c46cc83ba61b2b82ee21dc669f175 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 16:13:41 +0800 Subject: [PATCH 47/48] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1ed980856c07..f0dfc1600126 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,8 +61,6 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); - private ManifestFileSorter() {} - /** Context object that carries shared state across compaction methods. */ static class CompactionContext { final boolean fullCompaction; From 0b36890db5e0fb27b3a1b0c9df4c56ffa2dc5505 Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 22 May 2026 11:03:27 +0800 Subject: [PATCH 48/48] minorDelete --- .../paimon/operation/ManifestFileMerger.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 49 +++++++++++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fad84521f5fb..f899aa71786f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,7 +78,7 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - return ManifestFileSorter.trySortRewrite( + return ManifestFileSorter.trySortCompaction( input, newFilesForAbort, manifestFile, partitionType, options); } else { // Otherwise try full compaction first, then minor compaction if needed diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index f0dfc1600126..87a2474b0431 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -106,10 +106,10 @@ private static class ClassifyResult { * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. * - *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link - * #tryMinorCompact} otherwise. + *

    Dispatches to {@link #tryFullCompaction} when totalDeltaFileSize >= sizeTrigger, or {@link + * #tryMinorCompaction} otherwise. */ - static List trySortRewrite( + static List trySortCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -126,7 +126,7 @@ static List trySortRewrite( Integer manifestReadParallelism = options.scanManifestParallelism(); Optional> fullCompacted = - tryFullCompact( + tryFullCompaction( input, newFilesForAbort, manifestFile, @@ -142,7 +142,7 @@ static List trySortRewrite( if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact( + return tryMinorCompaction( input, newFilesForAbort, manifestFile, @@ -162,7 +162,7 @@ static List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private static Optional> tryFullCompact( + private static Optional> tryFullCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -234,6 +234,11 @@ private static Optional> tryFullCompact( splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + LOG.info( + "Manifest sort full compact: pickedFiles={}, sections={}.", + pickedFiles.size(), + sections.size()); + // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); rewriteSections( @@ -248,9 +253,8 @@ private static Optional> tryFullCompact( manifestReadParallelism); LOG.info( - "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", - sections.size(), - newFilesForAbort.size(), + "Manifest sort full compact completed: input={}, resultFiles={}.", + input.size(), result.size()); return Optional.of(result); } @@ -261,7 +265,7 @@ private static Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private static List tryMinorCompact( + private static List tryMinorCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -347,6 +351,11 @@ private static List tryMinorCompact( splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + LOG.info( + "Manifest sort minor compact: pickedFiles={}, sections={}.", + pickedFiles.size(), + sections.size()); + // Step 6: Rewrite sections MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); rewriteSections( @@ -367,9 +376,8 @@ private static List tryMinorCompact( } LOG.info( - "Manifest sort minor compact completed: sections={}, newFiles={}, resultFiles={}.", - sections.size(), - newFilesForAbort.size(), + "Manifest sort minor compact completed: input={}, resultFiles={}.", + input.size(), flatResult.size()); return flatResult; } @@ -391,6 +399,7 @@ private static CompactionContext prepareCompaction( int sortedRunSizeRatio, @Nullable Integer manifestReadParallelism) { + // Step 1: Resolve sort field and build comparator for partition ordering. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { throw new IllegalArgumentException( @@ -401,6 +410,7 @@ private static CompactionContext prepareCompaction( CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + // Step 2: Classify manifests into LSM files and collect delete entries. ClassifyResult classifyResult = classifyManifests( input, @@ -411,11 +421,13 @@ private static CompactionContext prepareCompaction( manifestReadParallelism); List lsmFiles = classifyResult.lsmFiles; + // Step 3: Build level-sorted runs from LSM files based on partition order. List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() : buildLevelSortedRuns(lsmFiles, fieldComparator); + // Step 4: Pick runs for compaction using size amplification and ratio strategy. ManifestPickStrategy pickStrategy = new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); List pickedRuns = pickStrategy.pick(levelRuns); @@ -898,14 +910,21 @@ private static void sortAndRewriteMinor( return singletonList(Pair.of(addBatch, deleteBatch)); }; - List addEntries = new ArrayList<>(); + Map addMap = new HashMap<>(); List minorDeleteEntries = new ArrayList<>(); for (Pair, List> pair : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - addEntries.addAll(pair.getLeft()); + for (ManifestEntry entry : pair.getLeft()) { + addMap.put(entry.identifier(), entry); + } minorDeleteEntries.addAll(pair.getRight()); } + // Cancel out ADD+DELETE pairs with the same identifier within the section. + minorDeleteEntries.removeIf( + manifestEntry -> addMap.remove(manifestEntry.identifier()) != null); + List addEntries = new ArrayList<>(addMap.values()); + if (!addEntries.isEmpty()) { List sorted = sortAndWriteEntries(addEntries, ctx.fieldComparator, manifestFile);