From c398350f5ae5e72ef19d8724d21e8d2b9ca67991 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Mon, 7 Oct 2024 19:43:31 +0530 Subject: [PATCH] HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is set --- .../org/apache/hadoop/fs/s3a/Constants.java | 13 +++++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 18 +++++-- .../markdown/tools/hadoop-aws/connecting.md | 10 ++++ .../hadoop/fs/s3a/ITestS3AConfiguration.java | 5 +- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 54 +++++++++++++++++++ 5 files changed, 93 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index cf7bc3ddcf248..eff42c1050f6b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1362,6 +1362,19 @@ private Constants() { */ public static final String XA_HEADER_PREFIX = "header."; + /** + * S3 cross region access enabled ? + * Value: {@value}. + */ + + public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED = + "fs.s3a.cross.region.access.enabled"; + /** + * Default value for S3 cross region access enabled: {@value}. + */ + public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = true; + + /** * AWS S3 region for the bucket. When set bypasses the construction of * region through endpoint url. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 4b3db99924747..c9c3eee30ea5d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -55,6 +55,8 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -259,8 +261,10 @@ protected ClientOverrideConfiguration.Builder createClientOverrideConfiguration( *
  • If endpoint is configured via via fs.s3a.endpoint, set it. * If no region is configured, try to parse region from endpoint.
  • *
  • If no region is configured, and it could not be parsed from the endpoint, - * set the default region as US_EAST_2 and enable cross region access.
  • + * set the default region as US_EAST_2 *
  • If configured region is empty, fallback to SDK resolution chain.
  • + *
  • S3 cross region is enabled by default irrespective of region or endpoint + * is set or not.
  • * * * @param builder S3 client builder. @@ -320,7 +324,6 @@ private , ClientT> void builder.endpointOverride(endpoint); LOG.debug("Setting endpoint to {}", endpoint); } else { - builder.crossRegionAccessEnabled(true); origin = "central endpoint with cross region access"; LOG.debug("Enabling cross region access for endpoint {}", endpointStr); @@ -333,7 +336,6 @@ private , ClientT> void // no region is configured, and none could be determined from the endpoint. // Use US_EAST_2 as default. region = Region.of(AWS_S3_DEFAULT_REGION); - builder.crossRegionAccessEnabled(true); builder.region(region); origin = "cross region access fallback"; } else if (configuredRegion.isEmpty()) { @@ -344,8 +346,14 @@ private , ClientT> void LOG.debug(SDK_REGION_CHAIN_IN_USE); origin = "SDK region chain"; } - - LOG.debug("Setting region to {} from {}", region, origin); + boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT); + // s3 cross region access + if (isCrossRegionAccessEnabled) { + builder.crossRegionAccessEnabled(true); + } + LOG.debug("Setting region to {} from {} with cross region access {}", + region, origin, isCrossRegionAccessEnabled); } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index d39c480b7cc5a..6fa37750ded8c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket. +The S3A connector supports S3 cross region access via AWS SDK which is enabled by default. This allows users to access S3 buckets in a different region than the one defined in the S3 endpoint/region configuration, as long as they are within the same AWS partition. However, S3 cross-region access can be disabled by: +```xml + + fs.s3a.cross.region.access.enabled + false + S3 cross region access + +``` + + Not supported: * AWS [Snowball](https://aws.amazon.com/snowball/). diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index a3b994054e4d3..967ba885bc90f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -439,6 +439,7 @@ public void testCustomUserAgent() throws Exception { @Test public void testRequestTimeout() throws Exception { conf = new Configuration(); + skipIfCrossRegionClient(conf); // remove the safety check on minimum durations. AWSClientConfig.setMinimumOperationDuration(Duration.ZERO); try { @@ -632,8 +633,8 @@ public static boolean isSTSSignerCalled() { */ private static void skipIfCrossRegionClient( Configuration configuration) { - if (configuration.get(ENDPOINT, null) == null - && configuration.get(AWS_REGION, null) == null) { + if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) { skip("Skipping test as cross region client is in use "); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index d06224df5b355..80b061de03183 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -44,8 +44,10 @@ import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -71,6 +73,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String US_WEST_2 = "us-west-2"; + private static final String SA_EAST_1 = "sa-east-1"; + private static final String EU_WEST_2 = "eu-west-2"; private static final String CN_NORTHWEST_1 = "cn-northwest-1"; @@ -346,6 +350,46 @@ public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { assertRequesterPaysFileExistence(newConf); } + @Test + public void testWithOutCrossRegionAccess() throws Exception { + describe("Verify cross region access fails when disabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); + final Configuration newConf = new Configuration(getConfiguration()); + removeBaseAndBucketOverrides(newConf, + ENDPOINT, + AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_REGION); + // disable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + intercept(AWSRedirectException.class, + "does not match the AWS region containing the bucket", + () -> fs.exists(getFileSystem().getWorkingDirectory())); + } + } + + @Test + public void testWithCrossRegionAccess() throws Exception { + describe("Verify cross region access succeed when enabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); + final Configuration newConf = new Configuration(getConfiguration()); + removeBaseAndBucketOverrides(newConf, + ENDPOINT, + AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_REGION); + // enable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + fs.exists(getFileSystem().getWorkingDirectory()); + } + } + @Test public void testCentralEndpointAndSameRegionAsBucket() throws Throwable { describe("Access public bucket using central endpoint and region " @@ -478,6 +522,16 @@ public void testCentralEndpointAndNullRegionFipsWithCRUD() throws Throwable { assertOpsUsingNewFs(); } + /** + * Skip the test if the region is null or sa-east-1. + */ + private void skipCrossRegionTest() throws IOException { + String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); + if (region == null || SA_EAST_1.equals(region)) { + skip("Skipping test since region is null or it is set to sa-east-1"); + } + } + private void assertOpsUsingNewFs() throws IOException { final String file = getMethodName(); final Path basePath = methodPath();