diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index cfe4fbad4c30..56d9b639cd58 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -461,6 +461,8 @@ public void testOnlyValidPartitionsAdded(HoodieTableType tableType) throws Excep // Create an empty directory which is not a partition directory (lacks partition metadata) final String nonPartitionDirectory = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0] + "-nonpartition"; Files.createDirectories(Paths.get(basePath, nonPartitionDirectory)); + // Write random file to assert it is not added to the view + Files.createFile(Paths.get(basePath, nonPartitionDirectory, "randomFile.parquet")); // Three directories which are partitions but will be ignored due to filter final String filterDirRegex = ".*-filterDir\\d|\\..*"; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java index 4abcae863ff6..9307e56ca332 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java @@ -79,6 +79,7 @@ public class FSUtils { public static final Pattern LOG_FILE_PATTERN = Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?"); public static final Pattern PREFIX_BY_FILE_ID_PATTERN = Pattern.compile("^(.+)-(\\d+)"); + private static final Pattern BASE_FILE_PATTERN = Pattern.compile("[a-zA-Z0-9-]+_[a-zA-Z0-9-]+_[0-9]+\\.[a-zA-Z0-9]+"); private static final String LOG_FILE_EXTENSION = ".log"; @@ -398,7 +399,10 @@ public static String makeLogFileName(String fileId, String logFileExtension, Str public static boolean isBaseFile(StoragePath path) { String extension = getFileExtension(path.getName()); - return HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(extension); + if (HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(extension)) { + return BASE_FILE_PATTERN.matcher(path.getName()).matches(); + } + return false; } public static boolean isLogFile(StoragePath logPath) { diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index 3484fe8ae572..6843602e4016 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -2000,16 +2000,16 @@ public DirectoryInfo(String relativePath, List pathInfos, Strin // Pre-allocate with the maximum length possible filenameToSizeMap = new HashMap<>(pathInfos.size()); + // Presence of partition meta file implies this is a HUDI partition + isHoodiePartition = pathInfos.stream().anyMatch(status -> status.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)); for (StoragePathInfo pathInfo : pathInfos) { - if (pathInfo.isDirectory()) { + // Do not attempt to search for more subdirectories inside directories that are partitions + if (!isHoodiePartition && pathInfo.isDirectory()) { // Ignore .hoodie directory as there cannot be any partitions inside it if (!pathInfo.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)) { this.subDirectories.add(pathInfo.getPath()); } - } else if (pathInfo.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)) { - // Presence of partition meta file implies this is a HUDI partition - this.isHoodiePartition = true; - } else if (FSUtils.isDataFile(pathInfo.getPath())) { + } else if (isHoodiePartition && FSUtils.isDataFile(pathInfo.getPath())) { // Regular HUDI data file (base file or log file) String dataFileCommitTime = FSUtils.getCommitTime(pathInfo.getPath().getName()); // Limit the file listings to files which were created by successful commits before the maxInstant time. diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java index bf4d69ac249f..3ad3ce542658 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java @@ -381,7 +381,7 @@ public static void createInflightSavepoint(String basePath, String instantTime) createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION); } - public static void createPartitionMetaFile(String basePath, String partitionPath) throws IOException { + public static URI createPartitionMetaFile(String basePath, String partitionPath) throws IOException { Path metaFilePath; try { Path parentPath = Paths.get(new URI(basePath).getPath(), partitionPath); @@ -390,6 +390,7 @@ public static void createPartitionMetaFile(String basePath, String partitionPath if (Files.notExists(metaFilePath)) { Files.createFile(metaFilePath); } + return metaFilePath.toUri(); } catch (URISyntaxException e) { throw new HoodieException("Error creating partition meta file", e); } diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index b69dc94609ba..01c21d7c2086 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -27,6 +27,7 @@ import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.testutils.FileCreateUtils; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; @@ -40,6 +41,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -98,6 +100,8 @@ public void testConvertFilesToPartitionStatsRecords() throws Exception { // Generate 10 inserts for each partition and populate partitionBaseFilePairs and recordKeys. DATE_PARTITIONS.forEach(p -> { try { + URI partitionMetaFile = FileCreateUtils.createPartitionMetaFile(basePath, p); + StoragePath partitionMetadataPath = new StoragePath(partitionMetaFile); String fileId1 = UUID.randomUUID().toString(); FileSlice fileSlice1 = new FileSlice(p, instant1, fileId1); StoragePath storagePath1 = new StoragePath(hoodieTestTable.getBaseFilePath(p, fileId1).toUri()); @@ -122,7 +126,7 @@ public void testConvertFilesToPartitionStatsRecords() throws Exception { fileSlice2.setBaseFile(baseFile2); partitionInfoList.add(new HoodieTableMetadataUtil.DirectoryInfo( p, - metaClient.getStorage().listDirectEntries(Arrays.asList(storagePath1, storagePath2)), + metaClient.getStorage().listDirectEntries(Arrays.asList(partitionMetadataPath, storagePath1, storagePath2)), instant2, Collections.emptySet())); } catch (Exception e) {