From 89a29852637affc34e54e1815a157562cfddb46b Mon Sep 17 00:00:00 2001 From: Ahmed Helmi Date: Thu, 20 Nov 2025 00:35:11 +0200 Subject: [PATCH 1/2] fix: Correctly interpret and apply S3 `path.style.access` configuration, including endpoint normalization and adding tests. --- native/core/src/parquet/objectstore/s3.rs | 89 +++++++++++++++++++++-- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/native/core/src/parquet/objectstore/s3.rs b/native/core/src/parquet/objectstore/s3.rs index 6d8f011d0b..9eec400eda 100644 --- a/native/core/src/parquet/objectstore/s3.rs +++ b/native/core/src/parquet/objectstore/s3.rs @@ -169,19 +169,19 @@ fn extract_s3_config_options( } // Extract and handle path style access (virtual hosted style) - let mut virtual_hosted_style_request = false; + let mut path_style_access = false; if let Some(path_style) = get_config_trimmed(configs, bucket, "path.style.access") { - virtual_hosted_style_request = path_style.to_lowercase() == "true"; + path_style_access = path_style.to_lowercase() == "true"; s3_configs.insert( AmazonS3ConfigKey::VirtualHostedStyleRequest, - virtual_hosted_style_request.to_string(), + (!path_style_access).to_string(), ); } - // Extract endpoint configuration and modify if virtual hosted style is enabled + // Extract endpoint configuration and modify if path style access is enabled if let Some(endpoint) = get_config_trimmed(configs, bucket, "endpoint") { let normalized_endpoint = - normalize_endpoint(endpoint, bucket, virtual_hosted_style_request); + normalize_endpoint(endpoint, bucket, path_style_access); if let Some(endpoint) = normalized_endpoint { s3_configs.insert(AmazonS3ConfigKey::Endpoint, endpoint); } @@ -202,7 +202,7 @@ fn extract_s3_config_options( fn normalize_endpoint( endpoint: &str, bucket: &str, - virtual_hosted_style_request: bool, + force_path_style: bool, ) -> Option { if endpoint.is_empty() { return None; @@ -221,7 +221,7 @@ fn normalize_endpoint( endpoint.to_string() }; - if virtual_hosted_style_request { + if force_path_style { if endpoint.ends_with("/") { Some(format!("{endpoint}{bucket}")) } else { @@ -1852,6 +1852,81 @@ mod tests { ); } + #[test] + fn test_extract_s3_config_path_style_access() { + // Case 1: path.style.access is true -> VirtualHostedStyleRequest should be false + let mut configs = HashMap::new(); + configs.insert("fs.s3a.path.style.access".to_string(), "true".to_string()); + let s3_configs = extract_s3_config_options(&configs, "test-bucket"); + assert_eq!( + s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), + Some(&"false".to_string()) + ); + + // Case 2: path.style.access is false -> VirtualHostedStyleRequest should be true + let mut configs = HashMap::new(); + configs.insert("fs.s3a.path.style.access".to_string(), "false".to_string()); + let s3_configs = extract_s3_config_options(&configs, "test-bucket"); + assert_eq!( + s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), + Some(&"true".to_string()) + ); + + // Case 3: path.style.access is not set -> VirtualHostedStyleRequest should not be set + let configs = HashMap::new(); + let s3_configs = extract_s3_config_options(&configs, "test-bucket"); + assert_eq!( + s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), + None + ); + } + + + #[test] + fn test_normalize_endpoint() { + let bucket = "test-bucket"; + + // Case 1: force_path_style = true -> should append bucket + assert_eq!( + normalize_endpoint("http://localhost:9000", bucket, true), + Some("http://localhost:9000/test-bucket".to_string()) + ); + assert_eq!( + normalize_endpoint("http://localhost:9000/", bucket, true), + Some("http://localhost:9000/test-bucket".to_string()) + ); + assert_eq!( + normalize_endpoint("https://my-s3-compatible.com", bucket, true), + Some("https://my-s3-compatible.com/test-bucket".to_string()) + ); + + // Case 2: force_path_style = false -> should NOT append bucket + assert_eq!( + normalize_endpoint("http://localhost:9000", bucket, false), + Some("http://localhost:9000".to_string()) + ); + assert_eq!( + normalize_endpoint("https://my-s3-compatible.com", bucket, false), + Some("https://my-s3-compatible.com".to_string()) + ); + + // Case 3: Default S3 endpoint -> should return None (ignored) + assert_eq!( + normalize_endpoint("s3.amazonaws.com", bucket, true), + None + ); + assert_eq!( + normalize_endpoint("s3.amazonaws.com", bucket, false), + None + ); + + // Case 4: Empty endpoint -> should return None + assert_eq!( + normalize_endpoint("", bucket, true), + None + ); + } + #[test] fn test_extract_s3_config_custom_endpoint() { let cases = vec![ From 016eb551030a81e4a87d9b382699f27720973b3c Mon Sep 17 00:00:00 2001 From: Ahmed Helmi Date: Thu, 20 Nov 2025 07:42:54 +0200 Subject: [PATCH 2/2] feat: Implement virtual-hosted-style S3 endpoint normalization for custom endpoints by inserting the bucket as a subdomain and update related tests. --- native/core/src/parquet/objectstore/s3.rs | 37 ++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/native/core/src/parquet/objectstore/s3.rs b/native/core/src/parquet/objectstore/s3.rs index 9eec400eda..cddfd9387a 100644 --- a/native/core/src/parquet/objectstore/s3.rs +++ b/native/core/src/parquet/objectstore/s3.rs @@ -222,13 +222,30 @@ fn normalize_endpoint( }; if force_path_style { + // Path-style: bucket in path + // Example: https://s3.oss-me-central-1.aliyuncs.com/my-bucket/key if endpoint.ends_with("/") { Some(format!("{endpoint}{bucket}")) } else { Some(format!("{endpoint}/{bucket}")) } } else { - Some(endpoint) // Avoid extra to_string() call since endpoint is already a String + // Virtual-hosted-style: bucket in subdomain + // For custom endpoints (non-AWS), we need to manually insert the bucket as a subdomain + // because object_store crate only does this automatically for AWS S3 endpoints. + // Example: https://my-bucket.s3.oss-me-central-1.aliyuncs.com/key + + // Parse the endpoint to insert bucket as subdomain + if let Some(protocol_end) = endpoint.find("://") { + let protocol = &endpoint[..protocol_end + 3]; // "https://" or "http://" + let rest = &endpoint[protocol_end + 3..]; // "s3.oss-me-central-1.aliyuncs.com" or "s3.oss-me-central-1.aliyuncs.com/path" + + // Insert bucket as first subdomain + Some(format!("{protocol}{bucket}.{rest}")) + } else { + // Shouldn't happen since we add protocol above, but fallback + Some(format!("{bucket}.{endpoint}")) + } } } @@ -1900,14 +1917,18 @@ mod tests { Some("https://my-s3-compatible.com/test-bucket".to_string()) ); - // Case 2: force_path_style = false -> should NOT append bucket + // Case 2: force_path_style = false -> should insert bucket as subdomain assert_eq!( normalize_endpoint("http://localhost:9000", bucket, false), - Some("http://localhost:9000".to_string()) + Some("http://test-bucket.localhost:9000".to_string()) ); assert_eq!( normalize_endpoint("https://my-s3-compatible.com", bucket, false), - Some("https://my-s3-compatible.com".to_string()) + Some("https://test-bucket.my-s3-compatible.com".to_string()) + ); + assert_eq!( + normalize_endpoint("https://s3.oss-me-central-1.aliyuncs.com", bucket, false), + Some("https://test-bucket.s3.oss-me-central-1.aliyuncs.com".to_string()) ); // Case 3: Default S3 endpoint -> should return None (ignored) @@ -1929,12 +1950,14 @@ mod tests { #[test] fn test_extract_s3_config_custom_endpoint() { + // When path.style.access is not set, it defaults to false (virtual-hosted-style) + // So bucket should be inserted as subdomain let cases = vec![ - ("custom.endpoint.com", "https://custom.endpoint.com"), - ("https://custom.endpoint.com", "https://custom.endpoint.com"), + ("custom.endpoint.com", "https://test-bucket.custom.endpoint.com"), + ("https://custom.endpoint.com", "https://test-bucket.custom.endpoint.com"), ( "https://custom.endpoint.com/path/to/resource", - "https://custom.endpoint.com/path/to/resource", + "https://test-bucket.custom.endpoint.com/path/to/resource", ), ]; for (endpoint, configured_endpoint) in cases {