Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@

public class S3Util {
private static final Logger LOG = LogManager.getLogger(Util.class);
// Hard cap on the size of a single numeric range expansion (e.g. {1..N})
// to prevent OOM from patterns like {1..100000000}
private static final int MAX_RANGE_EXPANSION_SIZE = 10000;

private static AwsCredentialsProvider getAwsCredencialsProvider(CloudCredential credential) {
AwsCredentials awsCredential;
Expand Down Expand Up @@ -348,6 +351,10 @@ public static String extendGlobNumberRange(String pathPattern) {
start = end;
end = temp;
}
// Skip excessively large ranges to avoid OOM from patterns like {1..100000000}
if ((long) end - start + 1 > MAX_RANGE_EXPANSION_SIZE) {
continue;
}
for (int i = start; i <= end; i++) {
if (!allNumbers.contains(i)) {
allNumbers.add(i);
Expand Down Expand Up @@ -583,6 +590,15 @@ private static List<Character> expandBracketContent(String content) {
return chars;
}

/**
* Exception thrown when brace expansion exceeds the specified limit.
*/
public static class BraceExpansionTooLargeException extends RuntimeException {
public BraceExpansionTooLargeException(int limit) {
super("Brace expansion exceeded limit of " + limit + " paths");
}
}

/**
* Expand brace patterns in a path to generate all concrete file paths.
* Handles nested and multiple brace patterns.
Expand All @@ -597,11 +613,30 @@ private static List<Character> expandBracketContent(String content) {
*/
public static List<String> expandBracePatterns(String pathPattern) {
List<String> result = new ArrayList<>();
expandBracePatternsRecursive(pathPattern, result);
expandBracePatternsRecursive(pathPattern, result, 0);
return result;
}

private static void expandBracePatternsRecursive(String pattern, List<String> result) {
/**
* Expand brace patterns with a limit on the number of expanded paths.
* Stops expansion early if the limit is exceeded, avoiding large allocations.
*
* @param pathPattern Path with optional brace patterns
* @param maxPaths Maximum number of expanded paths allowed; 0 or negative means unlimited
* @return List of expanded concrete paths
* @throws BraceExpansionTooLargeException if expansion exceeds maxPaths (when maxPaths > 0)
*/
public static List<String> expandBracePatterns(String pathPattern, int maxPaths) {
List<String> result = new ArrayList<>();
expandBracePatternsRecursive(pathPattern, result, maxPaths);
return result;
}

private static void expandBracePatternsRecursive(String pattern, List<String> result, int maxPaths) {
if (maxPaths > 0 && result.size() >= maxPaths) {
throw new BraceExpansionTooLargeException(maxPaths);
}

int braceStart = pattern.indexOf('{');
if (braceStart == -1) {
// No more braces, add the pattern as-is
Expand All @@ -626,7 +661,7 @@ private static void expandBracePatternsRecursive(String pattern, List<String> re

for (String alt : alternatives) {
// Recursively expand any remaining braces in the suffix
expandBracePatternsRecursive(prefix + alt + suffix, result);
expandBracePatternsRecursive(prefix + alt + suffix, result, maxPaths);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ public Status globList(String remotePath, List<RemoteFile> result, boolean fileN
long elementCnt = 0;
long matchCnt = 0;
long startTime = System.nanoTime();
boolean usedHeadPath = false;
Status st = Status.OK;
try {
remotePath = AzurePropertyUtils.validateAndNormalizeUri(remotePath);
Expand All @@ -370,6 +371,7 @@ public Status globList(String remotePath, List<RemoteFile> result, boolean fileN
&& S3Util.isDeterministicPattern(keyPattern)) {
Status headStatus = globListByGetProperties(bucket, keyPattern, result, fileNameOnly, startTime);
if (headStatus != null) {
usedHeadPath = true;
return headStatus;
}
// If headStatus is null, fall through to use listing
Expand Down Expand Up @@ -444,11 +446,13 @@ public Status globList(String remotePath, List<RemoteFile> result, boolean fileN
st = new Status(Status.ErrCode.COMMON_ERROR,
"errors while glob file " + remotePath + ": " + e.getMessage());
} finally {
long endTime = System.nanoTime();
long duration = endTime - startTime;
LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second",
remotePath, elementCnt, roundCnt, matchCnt,
duration / 1000);
if (!usedHeadPath) {
long endTime = System.nanoTime();
long duration = endTime - startTime;
LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second",
elementCnt, remotePath, roundCnt, matchCnt,
duration / 1000);
}
}
return st;
}
Expand All @@ -468,15 +472,15 @@ private Status globListByGetProperties(String bucket, String keyPattern,
List<RemoteFile> result, boolean fileNameOnly, long startTime) {
try {
// First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces
// Use limit-aware expansion to avoid large allocations before checking the limit
String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
expandedPattern = S3Util.extendGlobs(expandedPattern);
List<String> expandedPaths = S3Util.expandBracePatterns(expandedPattern);

// Fall back to listing if too many paths to avoid overwhelming Azure with requests
// Controlled by config: s3_head_request_max_paths
if (expandedPaths.size() > Config.s3_head_request_max_paths) {
LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST",
expandedPaths.size(), Config.s3_head_request_max_paths);
List<String> expandedPaths;
try {
expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths);
} catch (S3Util.BraceExpansionTooLargeException e) {
LOG.info("Brace expansion exceeded limit {}, falling back to LIST",
Config.s3_head_request_max_paths);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ private GlobListResult globListInternal(String remotePath, List<RemoteFile> resu
long startTime = System.nanoTime();
String currentMaxFile = "";
boolean hasLimits = fileSizeLimit > 0 || fileNumLimit > 0;
boolean usedHeadPath = false;
String bucket = "";
String finalPrefix = "";
try {
Expand Down Expand Up @@ -602,6 +603,7 @@ private GlobListResult globListInternal(String remotePath, List<RemoteFile> resu
GlobListResult headResult = globListByHeadRequests(
bucket, keyPattern, result, fileNameOnly, startTime);
if (headResult != null) {
usedHeadPath = true;
return headResult;
}
// If headResult is null, fall through to use listing
Expand Down Expand Up @@ -733,7 +735,7 @@ private GlobListResult globListInternal(String remotePath, List<RemoteFile> resu
} finally {
long endTime = System.nanoTime();
long duration = endTime - startTime;
if (LOG.isDebugEnabled()) {
if (!usedHeadPath && LOG.isDebugEnabled()) {
LOG.debug("process {} elements under prefix {} for {} round, match {} elements, take {} ms",
elementCnt, remotePath, roundCnt, matchCnt,
duration / 1000 / 1000);
Expand All @@ -756,15 +758,15 @@ private GlobListResult globListByHeadRequests(String bucket, String keyPattern,
List<RemoteFile> result, boolean fileNameOnly, long startTime) {
try {
// First expand [...] brackets to {...} braces, then expand {..} ranges, then expand braces
// Use limit-aware expansion to avoid large allocations before checking the limit
String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
expandedPattern = S3Util.extendGlobs(expandedPattern);
List<String> expandedPaths = S3Util.expandBracePatterns(expandedPattern);

// Fall back to listing if too many paths to avoid overwhelming S3 with HEAD requests
// Controlled by config: s3_head_request_max_paths
if (expandedPaths.size() > Config.s3_head_request_max_paths) {
LOG.info("Expanded path count {} exceeds limit {}, falling back to LIST",
expandedPaths.size(), Config.s3_head_request_max_paths);
List<String> expandedPaths;
try {
expandedPaths = S3Util.expandBracePatterns(expandedPattern, Config.s3_head_request_max_paths);
} catch (S3Util.BraceExpansionTooLargeException e) {
LOG.info("Brace expansion exceeded limit {}, falling back to LIST",
Config.s3_head_request_max_paths);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,5 +456,46 @@ public void testExpandBracketPatterns_malformedBracket() {
// Malformed bracket (no closing ]) - [ kept as literal
Assert.assertEquals("file[abc.csv", S3Util.expandBracketPatterns("file[abc.csv"));
}

// Tests for limit-aware expandBracePatterns

@Test
public void testExpandBracePatterns_withinLimit() {
// Expansion within the limit should succeed
List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 10);
Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result);
}

@Test
public void testExpandBracePatterns_exactlyAtLimit() {
// Expansion exactly at the limit should succeed
List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 3);
Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", "file3.csv"), result);
}

@Test(expected = S3Util.BraceExpansionTooLargeException.class)
public void testExpandBracePatterns_exceedsLimit() {
// Expansion exceeding the limit should throw
S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 3);
}

@Test(expected = S3Util.BraceExpansionTooLargeException.class)
public void testExpandBracePatterns_oneOverLimit() {
// maxPaths+1 items must also throw (boundary case)
S3Util.expandBracePatterns("file{1,2,3,4}.csv", 3);
}

@Test(expected = S3Util.BraceExpansionTooLargeException.class)
public void testExpandBracePatterns_cartesianExceedsLimit() {
// Cartesian product {a,b,c} x {1,2,3} = 9 paths, limit = 5
S3Util.expandBracePatterns("dir{a,b,c}/file{1,2,3}.csv", 5);
}

@Test
public void testExpandBracePatterns_zeroLimitMeansUnlimited() {
// maxPaths=0 means no limit (backward compatibility)
List<String> result = S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0);
Assert.assertEquals(5, result.size());
Comment on lines +494 to +498
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test asserts maxPaths=0 means unlimited, but the new expandBracePatterns(String, int) Javadoc says maxPaths must be > 0 and s3_head_request_max_paths is documented as a hard cap for HEAD-path expansion. Please align the test with the intended semantics (either treat 0 as “disable/fallback” like the prior behavior, or update the API/config documentation and add safeguards if 0 truly means “unlimited”).

Suggested change
@Test
public void testExpandBracePatterns_zeroLimitMeansUnlimited() {
// maxPaths=0 means no limit (backward compatibility)
List<String> result = S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0);
Assert.assertEquals(5, result.size());
@Test(expected = IllegalArgumentException.class)
public void testExpandBracePatterns_zeroLimitIsInvalid() {
// maxPaths must be > 0; zero is an invalid argument
S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0);

Copilot uses AI. Check for mistakes.
}
}

Loading