diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 0071c264376f..af7e996c8bef 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -380,6 +380,12 @@ Integer Percentage flexibility while comparing sorted run size for changelog mode table. If the candidate sorted run(s) size is 1% smaller than the next sorted run's size, then include next sorted run into this candidate set. + +
compaction.small-file-ratio
+ 0.7 + Double + The ratio of target file size. Files whose size is smaller than target-file-size * compaction.small-file-ratio will be picked for compaction rewriting. This avoids compacting the same file repeatedly due to compression inaccuracy causing output files to be slightly smaller than the target size. +
compaction.total-size-threshold
(none) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 897bebcf686c..2f24db43f4db 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -722,6 +722,17 @@ public InlineElement getDescription() { text("append table: the default value is 256 MB.")) .build()); + public static final ConfigOption COMPACTION_SMALL_FILE_RATIO = + key("compaction.small-file-ratio") + .doubleType() + .defaultValue(0.7) + .withDescription( + "The ratio of target file size. Files whose size is smaller than " + + "target-file-size * compaction.small-file-ratio will be " + + "picked for compaction rewriting. This avoids compacting the same " + + "file repeatedly due to compression inaccuracy causing output files " + + "to be slightly smaller than the target size."); + public static final ConfigOption BLOB_TARGET_FILE_SIZE = key("blob.target-file-size") .memoryType() @@ -2962,7 +2973,7 @@ public long compactionFileSize(boolean hasPrimaryKey) { // file size to join the compaction, we don't process on middle file size to avoid // compact a same file twice (the compression is not calculate so accurately. the output // file maybe be less than target file generated by rolling file write). - return targetFileSize(hasPrimaryKey) / 10 * 7; + return (long) (targetFileSize(hasPrimaryKey) * options.get(COMPACTION_SMALL_FILE_RATIO)); } public int numSortedRunCompactionTrigger() {