Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUDI-349]: Added new cleaning policy based on number of hours #3646

Merged
merged 15 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions hudi-cli/src/main/java/org/apache/hudi/cli/utils/CommitUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
import org.apache.hudi.common.table.timeline.HoodieTimeline;

import java.io.IOException;
import java.text.ParseException;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.Date;
import java.util.List;
Expand All @@ -53,16 +50,4 @@ public static String getTimeDaysAgo(int numberOfDays) {
Date date = Date.from(ZonedDateTime.now().minusDays(numberOfDays).toInstant());
return HoodieActiveTimeline.formatDate(date);
}

/**
* Add hours to specified time. If hours <0, this acts as remove hours.
* example, say compactionCommitTime: "20200202020000"
* a) hours: +1, returns 20200202030000
* b) hours: -1, returns 20200202010000
*/
public static String addHours(String compactionCommitTime, int hours) throws ParseException {
Instant instant = HoodieActiveTimeline.parseDateFromInstantTime(compactionCommitTime).toInstant();
ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
return HoodieActiveTimeline.formatDate(Date.from(commitDateTime.plusHours(hours).toInstant()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
.withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
+ "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");

public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
.defaultValue("24")
.withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
+ "compared to number of commits retained for cleaning service");
pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved

public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
.key("hoodie.cleaner.policy")
.defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
Expand Down Expand Up @@ -535,6 +540,11 @@ public Builder retainCommits(int commitsRetained) {
return this;
}

public Builder retainNumberOfHours(int cleanerHoursRetained) {
pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved
compactionConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
return this;
}

public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
compactionConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
compactionConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,10 @@ public int getCleanerCommitsRetained() {
return getInt(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED);
}

public int getCleanerHoursRetained() {
return getInt(HoodieCompactionConfig.CLEANER_HOURS_RETAINED);
}

public int getMaxCommitsToKeep() {
return getInt(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
Expand All @@ -50,8 +51,12 @@

import java.io.IOException;
import java.io.Serializable;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -123,6 +128,7 @@ public Stream<String> getSavepointedDataFiles(String savepointTime) {
public List<String> getPartitionPathsToClean(Option<HoodieInstant> earliestRetainedInstant) throws IOException {
switch (config.getCleanerPolicy()) {
case KEEP_LATEST_COMMITS:
case KEEP_LATEST_BY_HOURS:
return getPartitionPathsForCleanByCommits(earliestRetainedInstant);
case KEEP_LATEST_FILE_VERSIONS:
return getPartitionPathsForFullCleaning();
Expand Down Expand Up @@ -247,6 +253,10 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
return deletePaths;
}

private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath) {
return getFilesToCleanKeepingLatestCommits(partitionPath, config.getCleanerCommitsRetained(), HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
}

/**
* Selects the versions for file for cleaning, such that it
* <p>
Expand All @@ -261,8 +271,7 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
* <p>
* This policy is the default.
*/
private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath) {
int commitsRetained = config.getCleanerCommitsRetained();
private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath, int commitsRetained, HoodieCleaningPolicy policy) {
LOG.info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
List<CleanFileInfo> deletePaths = new ArrayList<>();

Expand Down Expand Up @@ -299,14 +308,24 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
// do not clean up a savepoint data file
continue;
}
// Dont delete the latest commit and also the last commit before the earliest commit we
// are retaining
// The window of commit retain == max query run time. So a query could be running which
// still
// uses this file.
if (fileCommitTime.equals(lastVersion) || (fileCommitTime.equals(lastVersionBeforeEarliestCommitToRetain))) {
// move on to the next file
continue;

if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
// Dont delete the latest commit and also the last commit before the earliest commit we
// are retaining
// The window of commit retain == max query run time. So a query could be running which
// still
// uses this file.
if (fileCommitTime.equals(lastVersion) || (fileCommitTime.equals(lastVersionBeforeEarliestCommitToRetain))) {
// move on to the next file
continue;
}
} else if (policy == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
// This block corresponds to KEEP_LATEST_BY_HOURS policy
// Do not delete the latest commit.
if (fileCommitTime.equals(lastVersion)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know why do have (fileCommitTime.equals(lastVersionBeforeEarliestCommitToRetain) in L318. I would prefer to keep the same logic unless we have strong reasons to remove. As I mentioned, cleaning based on num hours is just a diff way to conveying retain N commits. So, lets try to keep the logic similar across both. Easier to maintain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cleaner policies are designed to enable the longest running query to succeed. Let us take a small example where ingestion is happening every 30 minutes and hoodie.cleaner.commits.retained = 2. This implies the longest running query is going to take 1 hour to complete. Now if I maintain a list of commits, when commit at index=2 in this list (i.e 3rd commit from the beginning) is completed, the query which started when commit at index=0 completed might still be running.

list of commits -> 0. 1. 2
time (in mins). -> 0. 30. 60

Hence to allow this query to finish, we take into account lastVersionBeforeEarliestCommitToRetain in L318. lastVersion in this case is going to be commit at index=1.

With the new policy KEEP_LATEST_BY_HOURS, when 3rd commit is completed, 1st commit will be retained automatically by the logic. Hence the code in its current form works fine.

// move on to the next file
continue;
}
}

// Always keep the last commit
Expand All @@ -330,6 +349,19 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
}
return deletePaths;
}

/**
* This method finds the files to be cleaned based on the number of hours. If {@code config.getCleanerHoursRetained()} is set to 5,
* all the files with commit time earlier than 5 hours will be removed. Also the latest file for any file group is retained.
* This policy gives much more flexibility to users for retaining data for running incremental queries as compared to
* KEEP_LATEST_COMMITS cleaning policy. The default number of hours is 5.
* @param partitionPath partition path to check
* @return list of files to clean
*/
private List<CleanFileInfo> getFilesToCleanKeepingLatestHours(String partitionPath) {
nsivabalan marked this conversation as resolved.
Show resolved Hide resolved
int commitsToRetain = 0;
pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved
return getFilesToCleanKeepingLatestCommits(partitionPath, commitsToRetain, HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS);
}

private List<CleanFileInfo> getReplacedFilesEligibleToClean(List<String> savepointedFiles, String partitionPath, Option<HoodieInstant> earliestCommitToRetain) {
final Stream<HoodieFileGroup> replacedGroups;
Expand Down Expand Up @@ -388,6 +420,8 @@ public List<CleanFileInfo> getDeletePaths(String partitionPath) {
deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
} else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
} else if (policy == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
deletePaths = getFilesToCleanKeepingLatestHours(partitionPath);
} else {
throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
}
Expand All @@ -402,9 +436,16 @@ public List<CleanFileInfo> getDeletePaths(String partitionPath) {
public Option<HoodieInstant> getEarliestCommitToRetain() {
Option<HoodieInstant> earliestCommitToRetain = Option.empty();
int commitsRetained = config.getCleanerCommitsRetained();
int hoursRetained = config.getCleanerHoursRetained();
if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
&& commitTimeline.countInstants() > commitsRetained) {
earliestCommitToRetain = commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
earliestCommitToRetain = commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained); //15 instants total, 10 commits to retain, this gives 6th instant in the list
} else if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
Instant instant = Instant.now();
ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have any precedence in hudi code base for doing time based calculations. Can you explore and let me know. Wanna maintain some uniformity if we have any.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me check.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should sync this to how the instant times are generated.

Another way to think about this could be:
latestInstantTime = HoodieActiveTimeline.createNewInstantTime()
earliestCommitToRetain = HoodieActiveTimeline.findInstantInPast(latestInstantTime, N); // N = number of hours in the past.

Keeping the instant timestamp code together will prevent any surprises from a wrong time calculation cleaning data unintentionally.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nsivabalan I checked. There is no uniformity as of now. We use joda dateTime library in TimestampBasedKeyGenerator, java.time.ZonedDateTime in SlashEncodedHourPartitionValueExtractor, and java.util.Date in HoodieActiveTimeline.

@prashantwason nthInstant is what we are using throughout the codebase. I guess there is no need to introduce a new method findInstantInPast here.

pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved
String retainInstantsAfter = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusHours(hoursRetained).toInstant()));
pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved
earliestCommitToRetain = Option.fromJavaOptional(commitTimeline.getInstants().filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(),
HoodieTimeline.GREATER_THAN_OR_EQUALS, retainInstantsAfter)).findFirst());
}
return earliestCommitToRetain;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,13 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -1267,6 +1271,154 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
assertTrue(testTable.baseFileExists(p0, "00000000000005", file3P0C2));
}

/**
* Test cleaning policy based on number of hours retained policy. This test case covers the case when files will not be cleaned.
*/
@ParameterizedTest
@MethodSource("argumentsForTestKeepLatestCommits")
public void testKeepXHoursNoCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
nsivabalan marked this conversation as resolved.
Show resolved Hide resolved
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withIncrementalCleaningMode(enableIncrementalClean)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).retainNumberOfHours(2).build())
.build();

HoodieTestTable testTable = HoodieTestTable.of(metaClient);
String p0 = "2020/01/01";
String p1 = "2020/01/02";
Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;

String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId()
: UUID.randomUUID().toString();
String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId()
: UUID.randomUUID().toString();
Instant instant = Instant.now();
ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
int minutesForFirstCommit = 90;
String firstCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);

HoodieCommitMetadata commitMetadata = generateCommitMetadata(
Collections.unmodifiableMap(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0));
put(p1, CollectionUtils.createImmutableList(file1P1C0));
}
})
);
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));

metaClient = HoodieTableMetaClient.reload(metaClient);

List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));

// make next commit, with 1 insert & 1 update per partition
int minutesForSecondCommit = 40;
String secondCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
Map<String, String> partitionAndFileId002 = testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0, p1);
String file2P0C1 = partitionAndFileId002.get(p0);
String file2P1C1 = partitionAndFileId002.get(p1);
testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
commitMetadata = generateCommitMetadata(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
}
});
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
}
pratyakshsharma marked this conversation as resolved.
Show resolved Hide resolved

/**
* Tests cleaning service based on number of hours retained.
*/
@ParameterizedTest
@MethodSource("argumentsForTestKeepLatestCommits")
public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withIncrementalCleaningMode(enableIncrementalClean)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).retainNumberOfHours(2).build())
.build();

HoodieTestTable testTable = HoodieTestTable.of(metaClient);
String p0 = "2020/01/01";
String p1 = "2020/01/02";
Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;

String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId()
: UUID.randomUUID().toString();
String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId()
: UUID.randomUUID().toString();
Instant instant = Instant.now();
ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
int minutesForFirstCommit = 150;
String firstCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);

HoodieCommitMetadata commitMetadata = generateCommitMetadata(
Collections.unmodifiableMap(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0));
put(p1, CollectionUtils.createImmutableList(file1P1C0));
}
})
);
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));

metaClient = HoodieTableMetaClient.reload(metaClient);

List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));

// make next commit, with 1 insert & 1 update per partition
int minutesForSecondCommit = 90;
String secondCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
Map<String, String> partitionAndFileId002 = testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0, p1);
String file2P0C1 = partitionAndFileId002.get(p0);
String file2P1C1 = partitionAndFileId002.get(p1);
testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
commitMetadata = generateCommitMetadata(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
}
});
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
assertEquals(2, hoodieCleanStatsTwo.size(), "Should clean one file each from both the partitions");
assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
assertFalse(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
assertFalse(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
}

/**
* Generate Bootstrap index, bootstrap base file and corresponding metaClient.
* @return Partition to BootstrapFileMapping Map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@
* Hoodie cleaning policies.
*/
public enum HoodieCleaningPolicy {
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS;
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS, KEEP_LATEST_BY_HOURS;
}