From 4d9b83be4d1657329b89cacceb96912f210fc3c7 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Fri, 27 Mar 2026 23:51:57 -0700 Subject: [PATCH 1/2] [fix](s3) Add limit-aware brace expansion and fix misleading glob metrics Prevent OOM from unbounded brace expansion by adding early-stop to expandBracePatterns when the expansion exceeds s3_head_request_max_paths. Also skip LIST-path metrics logging when the HEAD/getProperties optimization was used, avoiding misleading "process 0 elements" logs. Found via review of #61775. Co-Authored-By: Claude Opus 4.6 --- .../org/apache/doris/common/util/S3Util.java | 34 +++++++++++++-- .../apache/doris/fs/obj/AzureObjStorage.java | 28 +++++++------ .../org/apache/doris/fs/obj/S3ObjStorage.java | 18 ++++---- .../apache/doris/common/util/S3UtilTest.java | 41 +++++++++++++++++++ 4 files changed, 98 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java index 3e4f4e7a62f9f6..6bb5b7ce0294bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java @@ -583,6 +583,15 @@ private static List expandBracketContent(String content) { return chars; } + /** + * Exception thrown when brace expansion exceeds the specified limit. + */ + public static class BraceExpansionTooLargeException extends RuntimeException { + public BraceExpansionTooLargeException(int limit) { + super("Brace expansion exceeded limit of " + limit + " paths"); + } + } + /** * Expand brace patterns in a path to generate all concrete file paths. * Handles nested and multiple brace patterns. @@ -597,11 +606,30 @@ private static List expandBracketContent(String content) { */ public static List expandBracePatterns(String pathPattern) { List result = new ArrayList<>(); - expandBracePatternsRecursive(pathPattern, result); + expandBracePatternsRecursive(pathPattern, result, 0); return result; } - private static void expandBracePatternsRecursive(String pattern, List result) { + /** + * Expand brace patterns with a limit on the number of expanded paths. + * Stops expansion early if the limit is exceeded, avoiding large allocations. + * + * @param pathPattern Path with optional brace patterns + * @param maxPaths Maximum number of expanded paths allowed (must be > 0) + * @return List of expanded concrete paths + * @throws BraceExpansionTooLargeException if expansion exceeds maxPaths + */ + public static List expandBracePatterns(String pathPattern, int maxPaths) { + List result = new ArrayList<>(); + expandBracePatternsRecursive(pathPattern, result, maxPaths); + return result; + } + + private static void expandBracePatternsRecursive(String pattern, List result, int maxPaths) { + if (maxPaths > 0 && result.size() >= maxPaths) { + throw new BraceExpansionTooLargeException(maxPaths); + } + int braceStart = pattern.indexOf('{'); if (braceStart == -1) { // No more braces, add the pattern as-is @@ -626,7 +654,7 @@ private static void expandBracePatternsRecursive(String pattern, List re for (String alt : alternatives) { // Recursively expand any remaining braces in the suffix - expandBracePatternsRecursive(prefix + alt + suffix, result); + expandBracePatternsRecursive(prefix + alt + suffix, result, maxPaths); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java index 4929e34e7f5a74..08f83ea8f60c5a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java @@ -354,6 +354,7 @@ public Status globList(String remotePath, List result, boolean fileN long elementCnt = 0; long matchCnt = 0; long startTime = System.nanoTime(); + boolean usedHeadPath = false; Status st = Status.OK; try { remotePath = AzurePropertyUtils.validateAndNormalizeUri(remotePath); @@ -370,6 +371,7 @@ public Status globList(String remotePath, List result, boolean fileN && S3Util.isDeterministicPattern(keyPattern)) { Status headStatus = globListByGetProperties(bucket, keyPattern, result, fileNameOnly, startTime); if (headStatus != null) { + usedHeadPath = true; return headStatus; } // If headStatus is null, fall through to use listing @@ -444,11 +446,13 @@ public Status globList(String remotePath, List result, boolean fileN st = new Status(Status.ErrCode.COMMON_ERROR, "errors while glob file " + remotePath + ": " + e.getMessage()); } finally { - long endTime = System.nanoTime(); - long duration = endTime - startTime; - LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", - remotePath, elementCnt, roundCnt, matchCnt, - duration / 1000); + if (!usedHeadPath) { + long endTime = System.nanoTime(); + long duration = endTime - startTime; + LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", + remotePath, elementCnt, roundCnt, matchCnt, + duration / 1000); + } } return st; } @@ -468,15 +472,15 @@ private Status globListByGetProperties(String bucket, String keyPattern, List result, boolean fileNameOnly, long startTime) { try { // First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces + // Use limit-aware expansion to avoid large allocations before checking the limit String expandedPattern = S3Util.expandBracketPatterns(keyPattern); expandedPattern = S3Util.extendGlobs(expandedPattern); - List expandedPaths = S3Util.expandBracePatterns(expandedPattern); - - // Fall back to listing if too many paths to avoid overwhelming Azure with requests - // Controlled by config: s3_head_request_max_paths - if (expandedPaths.size() > Config.s3_head_request_max_paths) { - LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST", - expandedPaths.size(), Config.s3_head_request_max_paths); + List expandedPaths; + try { + expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths); + } catch (S3Util.BraceExpansionTooLargeException e) { + LOG.info("Brace expansion exceeded limit {}, falling back to LIST", + Config.s3_head_request_max_paths); return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java index ec827c785a52b1..bb7cf945a425a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java @@ -575,6 +575,7 @@ private GlobListResult globListInternal(String remotePath, List resu long startTime = System.nanoTime(); String currentMaxFile = ""; boolean hasLimits = fileSizeLimit > 0 || fileNumLimit > 0; + boolean usedHeadPath = false; String bucket = ""; String finalPrefix = ""; try { @@ -602,6 +603,7 @@ private GlobListResult globListInternal(String remotePath, List resu GlobListResult headResult = globListByHeadRequests( bucket, keyPattern, result, fileNameOnly, startTime); if (headResult != null) { + usedHeadPath = true; return headResult; } // If headResult is null, fall through to use listing @@ -733,7 +735,7 @@ private GlobListResult globListInternal(String remotePath, List resu } finally { long endTime = System.nanoTime(); long duration = endTime - startTime; - if (LOG.isDebugEnabled()) { + if (!usedHeadPath && LOG.isDebugEnabled()) { LOG.debug("process {} elements under prefix {} for {} round, match {} elements, take {} ms", elementCnt, remotePath, roundCnt, matchCnt, duration / 1000 / 1000); @@ -756,15 +758,15 @@ private GlobListResult globListByHeadRequests(String bucket, String keyPattern, List result, boolean fileNameOnly, long startTime) { try { // First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces + // Use limit-aware expansion to avoid large allocations before checking the limit String expandedPattern = S3Util.expandBracketPatterns(keyPattern); expandedPattern = S3Util.extendGlobs(expandedPattern); - List expandedPaths = S3Util.expandBracePatterns(expandedPattern); - - // Fall back to listing if too many paths to avoid overwhelming S3 with HEAD requests - // Controlled by config: s3_head_request_max_paths - if (expandedPaths.size() > Config.s3_head_request_max_paths) { - LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST", - expandedPaths.size(), Config.s3_head_request_max_paths); + List expandedPaths; + try { + expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths); + } catch (S3Util.BraceExpansionTooLargeException e) { + LOG.info("Brace expansion exceeded limit {}, falling back to LIST", + Config.s3_head_request_max_paths); return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java index 4b976ed86cd3c5..a17f443f080e52 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java @@ -456,5 +456,46 @@ public void testExpandBracketPatterns_malformedBracket() { // Malformed bracket (no closing ]) - [ kept as literal Assert.assertEquals("file[abc.csv", S3Util.expandBracketPatterns("file[abc.csv")); } + + // Tests for limit-aware expandBracePatterns + + @Test + public void testExpandBracePatterns_withinLimit() { + // Expansion within the limit should succeed + List result = S3Util.expandBracePatterns("file{1,2,3}.csv", 10); + Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result); + } + + @Test + public void testExpandBracePatterns_exactlyAtLimit() { + // Expansion exactly at the limit should succeed + List result = S3Util.expandBracePatterns("file{1,2,3}.csv", 3); + Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_exceedsLimit() { + // Expansion exceeding the limit should throw + S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 3); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_oneOverLimit() { + // maxPaths+1 items must also throw (boundary case) + S3Util.expandBracePatterns("file{1,2,3,4}.csv", 3); + } + + @Test(expected = S3Util.BraceExpansionTooLargeException.class) + public void testExpandBracePatterns_cartesianExceedsLimit() { + // Cartesian product {a,b,c} x {1,2,3} = 9 paths, limit = 5 + S3Util.expandBracePatterns("dir{a,b,c}/file{1,2,3}.csv", 5); + } + + @Test + public void testExpandBracePatterns_zeroLimitMeansUnlimited() { + // maxPaths=0 means no limit (backward compatibility) + List result = S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0); + Assert.assertEquals(5, result.size()); + } } From 3832d79d648691667b398ee7a6180c1dc8c7d6ca Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Sat, 28 Mar 2026 00:30:59 -0700 Subject: [PATCH 2/2] [fix](s3) Address review: cap range expansion, fix Javadoc and Azure log order - Add MAX_RANGE_EXPANSION_SIZE (10000) hard cap in extendGlobNumberRange to prevent OOM from patterns like {1..100000000} before the limit-aware brace expansion even runs. - Fix Javadoc: maxPaths=0 means unlimited (not "must be > 0"). - Fix pre-existing Azure glob metrics log argument order (remotePath and elementCnt were swapped). Co-Authored-By: Claude Opus 4.6 --- .../java/org/apache/doris/common/util/S3Util.java | 11 +++++++++-- .../java/org/apache/doris/fs/obj/AzureObjStorage.java | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java index 6bb5b7ce0294bc..73ad6c1ad4036b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java @@ -63,6 +63,9 @@ public class S3Util { private static final Logger LOG = LogManager.getLogger(Util.class); + // Hard cap on the size of a single numeric range expansion (e.g. {1..N}) + // to prevent OOM from patterns like {1..100000000} + private static final int MAX_RANGE_EXPANSION_SIZE = 10000; private static AwsCredentialsProvider getAwsCredencialsProvider(CloudCredential credential) { AwsCredentials awsCredential; @@ -348,6 +351,10 @@ public static String extendGlobNumberRange(String pathPattern) { start = end; end = temp; } + // Skip excessively large ranges to avoid OOM from patterns like {1..100000000} + if ((long) end - start + 1 > MAX_RANGE_EXPANSION_SIZE) { + continue; + } for (int i = start; i <= end; i++) { if (!allNumbers.contains(i)) { allNumbers.add(i); @@ -615,9 +622,9 @@ public static List expandBracePatterns(String pathPattern) { * Stops expansion early if the limit is exceeded, avoiding large allocations. * * @param pathPattern Path with optional brace patterns - * @param maxPaths Maximum number of expanded paths allowed (must be > 0) + * @param maxPaths Maximum number of expanded paths allowed; 0 or negative means unlimited * @return List of expanded concrete paths - * @throws BraceExpansionTooLargeException if expansion exceeds maxPaths + * @throws BraceExpansionTooLargeException if expansion exceeds maxPaths (when maxPaths > 0) */ public static List expandBracePatterns(String pathPattern, int maxPaths) { List result = new ArrayList<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java index 08f83ea8f60c5a..35c41b98a9f4f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java @@ -450,7 +450,7 @@ public Status globList(String remotePath, List result, boolean fileN long endTime = System.nanoTime(); long duration = endTime - startTime; LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", - remotePath, elementCnt, roundCnt, matchCnt, + elementCnt, remotePath, roundCnt, matchCnt, duration / 1000); } }