Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,18 @@ private CompactScanner(SnapshotReader snapshotReader) {

List<ManifestFileMeta> manifestFileMetas =
snapshotReader.manifestsReader().read(snapshot, ScanMode.ALL).filteredManifests;
RangeHelper<ManifestFileMeta> rangeHelper =
new RangeHelper<>(ManifestFileMeta::minRowId, ManifestFileMeta::maxRowId);
this.metas = new ArrayDeque<>(rangeHelper.mergeOverlappingRanges(manifestFileMetas));

boolean allManifestMetaContainsRowId =
manifestFileMetas.stream()
.allMatch(meta -> meta.minRowId() != null && meta.maxRowId() != null);
if (allManifestMetaContainsRowId) {
RangeHelper<ManifestFileMeta> rangeHelper =
new RangeHelper<>(ManifestFileMeta::minRowId, ManifestFileMeta::maxRowId);
this.metas =
new ArrayDeque<>(rangeHelper.mergeOverlappingRanges(manifestFileMetas));
} else {
this.metas = new ArrayDeque<>(Collections.singletonList(manifestFileMetas));
}
}

List<ManifestEntry> scan() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,23 @@

package org.apache.paimon.append.dataevolution;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.Snapshot;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.manifest.FileKind;
import org.apache.paimon.manifest.FileSource;
import org.apache.paimon.manifest.ManifestEntry;
import org.apache.paimon.manifest.ManifestFileMeta;
import org.apache.paimon.operation.ManifestsReader;
import org.apache.paimon.options.Options;
import org.apache.paimon.partition.PartitionPredicate;
import org.apache.paimon.stats.StatsTestUtils;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.source.ScanMode;
import org.apache.paimon.table.source.snapshot.SnapshotReader;
import org.apache.paimon.utils.SnapshotManager;

import org.junit.jupiter.api.Test;

Expand All @@ -35,6 +45,8 @@
import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

/** Tests for {@link DataEvolutionCompactCoordinator.CompactPlanner}. */
public class DataEvolutionCompactCoordinatorTest {
Expand Down Expand Up @@ -168,6 +180,74 @@ public void testCompactPlannerWithBlobFiles() {
entries.get(5).file());
}

@Test
public void testPlanWithNullManifestRowId() {
FileStoreTable table = mock(FileStoreTable.class);
SnapshotReader snapshotReader = mock(SnapshotReader.class);
SnapshotManager snapshotManager = mock(SnapshotManager.class);
Snapshot snapshot = mock(Snapshot.class);
ManifestsReader manifestsReader = mock(ManifestsReader.class);

Options options = new Options();
options.set("target-file-size", "1 kb");
options.set("source.split.open-file-cost", "1 b");
options.set("compaction.min.file-num", "2");
when(table.coreOptions()).thenReturn(new CoreOptions(options));
when(table.newSnapshotReader()).thenReturn(snapshotReader);
when(snapshotReader.withPartitionFilter((PartitionPredicate) null))
.thenReturn(snapshotReader);
when(snapshotReader.snapshotManager()).thenReturn(snapshotManager);
when(snapshotManager.latestSnapshot()).thenReturn(snapshot);
when(snapshotReader.manifestsReader()).thenReturn(manifestsReader);

ManifestFileMeta metaWithNullRowId =
new ManifestFileMeta(
"manifest-1",
1L,
1L,
0L,
StatsTestUtils.newEmptySimpleStats(),
0L,
null,
null,
null,
null,
null,
null);
ManifestFileMeta metaWithRowId =
new ManifestFileMeta(
"manifest-2",
1L,
1L,
0L,
StatsTestUtils.newEmptySimpleStats(),
0L,
null,
null,
null,
null,
0L,
199L);
List<ManifestFileMeta> metas = Arrays.asList(metaWithNullRowId, metaWithRowId);
when(manifestsReader.read(snapshot, ScanMode.ALL))
.thenReturn(new ManifestsReader.Result(snapshot, metas, metas));

ManifestEntry entry1 = makeEntry("file1.parquet", 0L, 100L, 600);
ManifestEntry entry2 = makeEntry("file2.parquet", 100L, 100L, 600);
when(snapshotReader.readManifest(metaWithNullRowId))
.thenReturn(Collections.singletonList(entry1));
when(snapshotReader.readManifest(metaWithRowId))
.thenReturn(Collections.singletonList(entry2));

DataEvolutionCompactCoordinator coordinator =
new DataEvolutionCompactCoordinator(table, false);
List<DataEvolutionCompactTask> tasks = coordinator.plan();

assertThat(tasks).hasSize(1);
assertThat(tasks.get(0).compactBefore().stream().map(DataFileMeta::fileName))
.containsExactly(entry1.file().fileName(), entry2.file().fileName());
}

private ManifestEntry makeEntry(
String fileName, long firstRowId, long rowCount, long fileSize) {
return makeEntryWithSize(
Expand Down