-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-9482] Fix the issue where the file slice was wrongly filtered out #13342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
bfaf6c9
f7510b1
e81ad23
7cbc4dc
b35bcc9
d31cfdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,9 @@ | |
|
|
||
| package org.apache.hudi.common.model; | ||
|
|
||
| import org.apache.hudi.common.table.HoodieTableVersion; | ||
| import org.apache.hudi.common.table.timeline.CompletionTimeQueryView; | ||
| import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; | ||
| import org.apache.hudi.common.table.timeline.HoodieInstant; | ||
| import org.apache.hudi.common.table.timeline.HoodieTimeline; | ||
| import org.apache.hudi.common.util.Option; | ||
|
|
@@ -119,6 +121,10 @@ public void addLogFile(CompletionTimeQueryView completionTimeQueryView, HoodieLo | |
| @VisibleForTesting | ||
| public String getBaseInstantTime(CompletionTimeQueryView completionTimeQueryView, HoodieLogFile logFile) { | ||
| if (fileSlices.isEmpty()) { | ||
| if (completionTimeQueryView.getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT) && !completionTimeQueryView.isCompleted(logFile.getDeltaCommitTime())) { | ||
| // no base file in the file group and the smallest log file is still pending, we should use the INIT_INSTANT_TS for these uncertain cases. | ||
| return HoodieActiveTimeline.INIT_INSTANT_TS; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like a version compatibility issue?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Indeed, it is. Essentially, it is because the log file name of the old version contained the base instant time of this slice, while the log file name of the new version only contains the instant written to this file.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update: |
||
| } | ||
| // no base file in the file group, use the log file delta commit time. | ||
| return logFile.getDeltaCommitTime(); | ||
| } | ||
|
|
@@ -130,7 +136,6 @@ public String getBaseInstantTime(CompletionTimeQueryView completionTimeQueryView | |
| return commitTime; | ||
| } | ||
| } | ||
| // no base file that starts earlier than the log delta commit completion time, | ||
| // use the log file delta commit time. | ||
| return logFile.getDeltaCommitTime(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -590,6 +590,27 @@ private Stream<FileSlice> filterUncommittedFiles(FileSlice fileSlice, boolean in | |
| return Stream.of(fileSlice); | ||
| } | ||
|
|
||
| /** | ||
| * Ignores the empty file-slice. | ||
| * When a slice is full of pending log files, we need to filter out this situation because in fact, the file group represented by this file slice has not committed any data either | ||
| * @param fileSlice File Slice | ||
| * @return Stream of FileSlice | ||
| */ | ||
| private Stream<FileSlice> filterOutEmptyFileSlice(FileSlice fileSlice) { | ||
| // Check if the file slice is empty | ||
| // a. the base instant is a pending compaction, still return the file slice | ||
| // b. the base instant is a normal write, should filter out the file slice, because the entire file group is empty | ||
| if (fileSlice.isEmpty()) { | ||
| if (isFileSliceAfterPendingCompaction(fileSlice)) { | ||
| LOG.debug("File Slice ({}) is in pending compaction", fileSlice); | ||
| } else { | ||
| LOG.debug("File Slice ({}) is empty", fileSlice); | ||
| return Stream.of(); | ||
| } | ||
| } | ||
| return Stream.of(fileSlice); | ||
| } | ||
|
|
||
| /** | ||
| * Ignores the uncommitted log files. | ||
| * | ||
|
|
@@ -867,6 +888,10 @@ public final Stream<FileSlice> getLatestFileSlices(String partitionStr) { | |
| .flatMap(slice -> tableVersion8AndAbove() | ||
| ? this.filterUncommittedFiles(slice, true) | ||
| : this.filterBaseFileAfterPendingCompaction(slice, true)) | ||
| .flatMap(slice -> tableVersion8AndAbove() | ||
| // for table version 8 and above, we need to filter out empty file slices | ||
| ? this.filterOutEmptyFileSlice(slice) | ||
| : Stream.of(slice)) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should filter out the |
||
| .map(this::addBootstrapBaseFileIfPresent); | ||
| } finally { | ||
| readLock.unlock(); | ||
|
|
@@ -899,7 +924,11 @@ public final Stream<FileSlice> getLatestFileSlicesStateless(String partitionStr) | |
| .filter(Option::isPresent).map(Option::get) | ||
| .flatMap(slice -> tableVersion8AndAbove() | ||
| ? this.filterUncommittedFiles(slice, true) | ||
| : this.filterBaseFileAfterPendingCompaction(slice, true)); | ||
| : this.filterBaseFileAfterPendingCompaction(slice, true)) | ||
| .flatMap(slice -> tableVersion8AndAbove() | ||
| // for table version 8 and above, we need to filter out empty file slices | ||
| ? this.filterOutEmptyFileSlice(slice) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
| : Stream.of(slice)); | ||
|
|
||
| if (bootstrapIndex.useIndex()) { | ||
| final Map<HoodieFileGroupId, BootstrapBaseFileMapping> bootstrapBaseFileMappings = getBootstrapBaseFileMappings(partition); | ||
|
|
@@ -930,9 +959,13 @@ public final Option<FileSlice> getLatestFileSlice(String partitionStr, String fi | |
| if (!fs.isPresent()) { | ||
| return Option.empty(); | ||
| } | ||
| Stream<FileSlice> fileSlices = tableVersion8AndAbove() | ||
| Stream<FileSlice> fileSlices = (tableVersion8AndAbove() | ||
| ? this.filterUncommittedFiles(fs.get(), true) | ||
| : this.filterBaseFileAfterPendingCompaction(fs.get(), true); | ||
| : this.filterBaseFileAfterPendingCompaction(fs.get(), true)) | ||
| .flatMap(slice -> tableVersion8AndAbove() | ||
| // for table version 8 and above, we need to filter out empty file slices | ||
| ? this.filterOutEmptyFileSlice(slice) | ||
| : Stream.of(slice)); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
|
|
||
| return Option.ofNullable(fileSlices | ||
| .map(this::addBootstrapBaseFileIfPresent) | ||
|
|
@@ -1051,6 +1084,9 @@ public final Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partit | |
| fileSlice = Option.of(fetchMergedFileSlice(fileGroup, tableVersion8AndAbove() | ||
| ? filterUncommittedLogs(fileSlice.get()) : fileSlice.get()) | ||
| ); | ||
| if (fileSlice.isPresent() && tableVersion8AndAbove()) { | ||
| fileSlice = Option.fromJavaOptional(filterOutEmptyFileSlice(fileSlice.get()).findAny()); | ||
| } | ||
| } | ||
| return fileSlice; | ||
| }).filter(Option::isPresent).map(Option::get).map(this::addBootstrapBaseFileIfPresent); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The changes here are intended to achieve the following two points: