Skip to content

Commit

Permalink
[HUDI-5336] Fixing log file pattern match to ignore extraneous files (#…
Browse files Browse the repository at this point in the history
…7612)

Co-authored-by: Y Ethan Guo <ethan.guoyihua@gmail.com>
  • Loading branch information
nsivabalan and yihua committed Jan 20, 2023
1 parent f8028a4 commit 86be855
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public class FSUtils {
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1_1-0-1
// Archive log files are of this pattern - .commits_.archive.1_1-0-1
public static final Pattern LOG_FILE_PATTERN =
Pattern.compile("\\.(.+)_(.*)\\.(.+)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
private static final long MIN_ROLLBACK_TO_KEEP = 10;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ public void testAppendNotSupported(@TempDir java.nio.file.Path tempDir) throws I

for (int i = 0; i < 2; i++) {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("")
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits").overBaseCommit("")
.withFs(localFs).build();
writer.appendBlock(dataBlock);
writer.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void testFailedToGetAppendStreamFromHDFSNameNode()
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);

Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
.overBaseCommit("").withFs(fs).build();

writer.appendBlock(dataBlock);
Expand Down Expand Up @@ -134,7 +134,7 @@ public void testFailedToGetAppendStreamFromHDFSNameNode()
// Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and
// return a new writer with a bumped up logVersion
writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
.overBaseCommit("").withFs(fs).build();
header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,14 @@ protected void testInvalidLogFiles() throws Exception {
String fileName2 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
// create a dummy log file mimicing cloud stores marker files
String fileName3 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
String fileName3 = "_GCS_SYNCABLE_TEMPFILE_" + fileName1;
String fileName4 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
// this file should not be deduced as a log file.

Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName3).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName4).toFile().createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();

HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
Expand Down

0 comments on commit 86be855

Please sign in to comment.