Skip to content

Commit

Permalink
add base file regex, short circuit search
Browse files Browse the repository at this point in the history
  • Loading branch information
the-other-tim-brown committed May 21, 2024
1 parent 6af1525 commit ef710f1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public class FSUtils {
public static final Pattern LOG_FILE_PATTERN =
Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
public static final Pattern PREFIX_BY_FILE_ID_PATTERN = Pattern.compile("^(.+)-(\\d+)");
private static final Pattern BASE_FILE_PATTERN = Pattern.compile("[a-zA-Z0-9]+_[a-zA-Z0-9]+_[0-9]+\\.[a-zA-Z0-9]+");

private static final String LOG_FILE_EXTENSION = ".log";

Expand Down Expand Up @@ -398,7 +399,10 @@ public static String makeLogFileName(String fileId, String logFileExtension, Str

public static boolean isBaseFile(StoragePath path) {
String extension = getFileExtension(path.getName());
return HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(extension);
if (HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(extension)) {
return BASE_FILE_PATTERN.matcher(path.getName()).matches();
}
return false;
}

public static boolean isLogFile(StoragePath logPath) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2003,7 +2003,8 @@ public DirectoryInfo(String relativePath, List<StoragePathInfo> pathInfos, Strin
// Presence of partition meta file implies this is a HUDI partition
isHoodiePartition = pathInfos.stream().anyMatch(status -> status.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX));
for (StoragePathInfo pathInfo : pathInfos) {
if (pathInfo.isDirectory()) {
// Do not attempt to search for more subdirectories inside directories that are partitions
if (!isHoodiePartition && pathInfo.isDirectory()) {
// Ignore .hoodie directory as there cannot be any partitions inside it
if (!pathInfo.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)) {
this.subDirectories.add(pathInfo.getPath());
Expand Down

0 comments on commit ef710f1

Please sign in to comment.