-
Notifications
You must be signed in to change notification settings - Fork 253
fix: Correctly interpret and apply S3 path.style.access configurati…
#2803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -169,19 +169,19 @@ fn extract_s3_config_options( | |||||
| } | ||||||
|
|
||||||
| // Extract and handle path style access (virtual hosted style) | ||||||
| let mut virtual_hosted_style_request = false; | ||||||
| let mut path_style_access = false; | ||||||
| if let Some(path_style) = get_config_trimmed(configs, bucket, "path.style.access") { | ||||||
| virtual_hosted_style_request = path_style.to_lowercase() == "true"; | ||||||
| path_style_access = path_style.to_lowercase() == "true"; | ||||||
| s3_configs.insert( | ||||||
| AmazonS3ConfigKey::VirtualHostedStyleRequest, | ||||||
| virtual_hosted_style_request.to_string(), | ||||||
| (!path_style_access).to_string(), | ||||||
| ); | ||||||
| } | ||||||
|
|
||||||
| // Extract endpoint configuration and modify if virtual hosted style is enabled | ||||||
| // Extract endpoint configuration and modify if path style access is enabled | ||||||
| if let Some(endpoint) = get_config_trimmed(configs, bucket, "endpoint") { | ||||||
| let normalized_endpoint = | ||||||
| normalize_endpoint(endpoint, bucket, virtual_hosted_style_request); | ||||||
| normalize_endpoint(endpoint, bucket, path_style_access); | ||||||
| if let Some(endpoint) = normalized_endpoint { | ||||||
| s3_configs.insert(AmazonS3ConfigKey::Endpoint, endpoint); | ||||||
| } | ||||||
|
|
@@ -202,7 +202,7 @@ fn extract_s3_config_options( | |||||
| fn normalize_endpoint( | ||||||
| endpoint: &str, | ||||||
| bucket: &str, | ||||||
| virtual_hosted_style_request: bool, | ||||||
| force_path_style: bool, | ||||||
| ) -> Option<String> { | ||||||
| if endpoint.is_empty() { | ||||||
| return None; | ||||||
|
|
@@ -221,14 +221,31 @@ fn normalize_endpoint( | |||||
| endpoint.to_string() | ||||||
| }; | ||||||
|
|
||||||
| if virtual_hosted_style_request { | ||||||
| if force_path_style { | ||||||
| // Path-style: bucket in path | ||||||
| // Example: https://s3.oss-me-central-1.aliyuncs.com/my-bucket/key | ||||||
| if endpoint.ends_with("/") { | ||||||
| Some(format!("{endpoint}{bucket}")) | ||||||
| } else { | ||||||
| Some(format!("{endpoint}/{bucket}")) | ||||||
| } | ||||||
| } else { | ||||||
| Some(endpoint) // Avoid extra to_string() call since endpoint is already a String | ||||||
| // Virtual-hosted-style: bucket in subdomain | ||||||
| // For custom endpoints (non-AWS), we need to manually insert the bucket as a subdomain | ||||||
| // because object_store crate only does this automatically for AWS S3 endpoints. | ||||||
| // Example: https://my-bucket.s3.oss-me-central-1.aliyuncs.com/key | ||||||
|
|
||||||
| // Parse the endpoint to insert bucket as subdomain | ||||||
| if let Some(protocol_end) = endpoint.find("://") { | ||||||
| let protocol = &endpoint[..protocol_end + 3]; // "https://" or "http://" | ||||||
| let rest = &endpoint[protocol_end + 3..]; // "s3.oss-me-central-1.aliyuncs.com" or "s3.oss-me-central-1.aliyuncs.com/path" | ||||||
|
|
||||||
| // Insert bucket as first subdomain | ||||||
| Some(format!("{protocol}{bucket}.{rest}")) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html#general-purpose-bucket-names usage of periods (.) in the bucket name would create several subdomains. Is that supported ? Or a check should be added and reported somehow ?! |
||||||
| } else { | ||||||
| // Shouldn't happen since we add protocol above, but fallback | ||||||
| Some(format!("{bucket}.{endpoint}")) | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -1852,14 +1869,95 @@ mod tests { | |||||
| ); | ||||||
| } | ||||||
|
|
||||||
| #[test] | ||||||
| fn test_extract_s3_config_path_style_access() { | ||||||
| // Case 1: path.style.access is true -> VirtualHostedStyleRequest should be false | ||||||
| let mut configs = HashMap::new(); | ||||||
| configs.insert("fs.s3a.path.style.access".to_string(), "true".to_string()); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
to test |
||||||
| let s3_configs = extract_s3_config_options(&configs, "test-bucket"); | ||||||
| assert_eq!( | ||||||
| s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), | ||||||
| Some(&"false".to_string()) | ||||||
| ); | ||||||
|
|
||||||
| // Case 2: path.style.access is false -> VirtualHostedStyleRequest should be true | ||||||
| let mut configs = HashMap::new(); | ||||||
| configs.insert("fs.s3a.path.style.access".to_string(), "false".to_string()); | ||||||
| let s3_configs = extract_s3_config_options(&configs, "test-bucket"); | ||||||
| assert_eq!( | ||||||
| s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), | ||||||
| Some(&"true".to_string()) | ||||||
| ); | ||||||
|
|
||||||
| // Case 3: path.style.access is not set -> VirtualHostedStyleRequest should not be set | ||||||
| let configs = HashMap::new(); | ||||||
| let s3_configs = extract_s3_config_options(&configs, "test-bucket"); | ||||||
| assert_eq!( | ||||||
| s3_configs.get(&AmazonS3ConfigKey::VirtualHostedStyleRequest), | ||||||
| None | ||||||
| ); | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| #[test] | ||||||
| fn test_normalize_endpoint() { | ||||||
| let bucket = "test-bucket"; | ||||||
|
|
||||||
| // Case 1: force_path_style = true -> should append bucket | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("http://localhost:9000", bucket, true), | ||||||
| Some("http://localhost:9000/test-bucket".to_string()) | ||||||
| ); | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("http://localhost:9000/", bucket, true), | ||||||
| Some("http://localhost:9000/test-bucket".to_string()) | ||||||
| ); | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("https://my-s3-compatible.com", bucket, true), | ||||||
| Some("https://my-s3-compatible.com/test-bucket".to_string()) | ||||||
| ); | ||||||
|
|
||||||
| // Case 2: force_path_style = false -> should insert bucket as subdomain | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("http://localhost:9000", bucket, false), | ||||||
| Some("http://test-bucket.localhost:9000".to_string()) | ||||||
| ); | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("https://my-s3-compatible.com", bucket, false), | ||||||
| Some("https://test-bucket.my-s3-compatible.com".to_string()) | ||||||
| ); | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("https://s3.oss-me-central-1.aliyuncs.com", bucket, false), | ||||||
| Some("https://test-bucket.s3.oss-me-central-1.aliyuncs.com".to_string()) | ||||||
| ); | ||||||
|
|
||||||
| // Case 3: Default S3 endpoint -> should return None (ignored) | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("s3.amazonaws.com", bucket, true), | ||||||
| None | ||||||
| ); | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("s3.amazonaws.com", bucket, false), | ||||||
| None | ||||||
| ); | ||||||
|
|
||||||
| // Case 4: Empty endpoint -> should return None | ||||||
| assert_eq!( | ||||||
| normalize_endpoint("", bucket, true), | ||||||
| None | ||||||
| ); | ||||||
| } | ||||||
|
|
||||||
| #[test] | ||||||
| fn test_extract_s3_config_custom_endpoint() { | ||||||
| // When path.style.access is not set, it defaults to false (virtual-hosted-style) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Is this what you mean ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But https://github.com/apache/datafusion-comet/pull/2803/files#diff-e8053ba6b0c33f3c35806e207a14881282d9c8a83c2a8adeed55d6a91493fb78R1892 says |
||||||
| // So bucket should be inserted as subdomain | ||||||
| let cases = vec![ | ||||||
| ("custom.endpoint.com", "https://custom.endpoint.com"), | ||||||
| ("https://custom.endpoint.com", "https://custom.endpoint.com"), | ||||||
| ("custom.endpoint.com", "https://test-bucket.custom.endpoint.com"), | ||||||
| ("https://custom.endpoint.com", "https://test-bucket.custom.endpoint.com"), | ||||||
| ( | ||||||
| "https://custom.endpoint.com/path/to/resource", | ||||||
| "https://custom.endpoint.com/path/to/resource", | ||||||
| "https://test-bucket.custom.endpoint.com/path/to/resource", | ||||||
| ), | ||||||
| ]; | ||||||
| for (endpoint, configured_endpoint) in cases { | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure that it is more clear