From a4d47db98291c0fa8ad66f61a971896e7fa53ca6 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 26 Apr 2026 11:42:32 +0800 Subject: [PATCH] [core] Skip file index for changelog files --- .../paimon/io/KeyValueFileWriterFactory.java | 6 ++- .../paimon/io/KeyValueFileReadWriteTest.java | 52 ++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java index 2a741a792ada..b1ae451e5974 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java @@ -149,6 +149,8 @@ private KeyValueClusteringFileWriter createKvSeparatedFileWriter( private KeyValueDataFileWriter createDataFileWriter( Path path, WriteFormatKey key, FileSource fileSource, boolean isExternalPath) { + // Changelog is sequentially consumed, file index is unnecessary. + FileIndexOptions indexOptions = key.isChangelog ? new FileIndexOptions() : fileIndexOptions; return formatContext.thinModeEnabled ? new KeyValueThinDataFileWriterImpl( fileIO, @@ -161,7 +163,7 @@ private KeyValueDataFileWriter createDataFileWriter( key.level, options, fileSource, - fileIndexOptions, + indexOptions, isExternalPath) : new KeyValueDataFileWriterImpl( fileIO, @@ -174,7 +176,7 @@ private KeyValueDataFileWriter createDataFileWriter( key.level, options, fileSource, - fileIndexOptions, + indexOptions, isExternalPath); } diff --git a/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java b/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java index 248f3b3ac486..0169a6d5e3d6 100644 --- a/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java @@ -308,11 +308,16 @@ public void testFileSuffix(@TempDir java.nio.file.Path tempDir) throws Exception } protected KeyValueFileWriterFactory createWriterFactory(String pathStr, String format) { + Options options = new Options(); + options.set(CoreOptions.METADATA_STATS_MODE, "FULL"); + return createWriterFactory(pathStr, format, options); + } + + protected KeyValueFileWriterFactory createWriterFactory( + String pathStr, String format, Options options) { Path path = new Path(pathStr); int suggestedFileSize = ThreadLocalRandom.current().nextInt(8192) + 1024; FileIO fileIO = FileIOFinder.find(path); - Options options = new Options(); - options.set(CoreOptions.METADATA_STATS_MODE, "FULL"); Function pathFactoryMap = format1 -> @@ -455,6 +460,49 @@ private void checkRollingFiles( } } + @Test + void testChangelogFile() throws Exception { + Options options = new Options(); + options.set(CoreOptions.METADATA_STATS_MODE, "FULL"); + options.setString("file-index.bloom-filter.columns", "comment"); + options.setString("file-index.in-manifest-threshold", "1B"); + + KeyValueFileWriterFactory writerFactory = + createWriterFactory(tempDir.toString(), "avro", options); + + DataFileTestDataGenerator.Data data = gen.next(); + RollingFileWriter dataWriter = + writerFactory.createRollingMergeTreeFileWriter(0, FileSource.APPEND); + dataWriter.write(CloseableIterator.fromList(data.content, kv -> {})); + dataWriter.close(); + List dataFileMetas = dataWriter.result(); + + assertThat(dataFileMetas).isNotEmpty(); + assertThat( + dataFileMetas.stream() + .anyMatch( + meta -> + meta.extraFiles().stream() + .anyMatch( + f -> + f.endsWith( + DataFilePathFactory + .INDEX_PATH_SUFFIX)))) + .isTrue(); + + RollingFileWriter changelogWriter = + writerFactory.createRollingChangelogFileWriter(0); + changelogWriter.write(CloseableIterator.fromList(data.content, kv -> {})); + changelogWriter.close(); + List changelogMetas = changelogWriter.result(); + + assertThat(changelogMetas).isNotEmpty(); + for (DataFileMeta meta : changelogMetas) { + assertThat(meta.extraFiles()) + .noneMatch(f -> f.endsWith(DataFilePathFactory.INDEX_PATH_SUFFIX)); + } + } + @ParameterizedTest @ValueSource(strings = {"parquet", "orc", "avro"}) public void testReaderUseFileSizeFromMetadata(String format) throws Exception {