From d1100a6f6354d1d291416939914de3fc1ead8f71 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Tue, 14 May 2024 05:02:06 +0530 Subject: [PATCH] Add retries for building S3 client (#16438) * Add retries for building S3 client * Use S3Utils instead of RetryUtils * Add test --- .../org/apache/druid/storage/s3/S3Utils.java | 13 +++++++---- .../s3/ServerSideEncryptingAmazonS3.java | 10 ++++++++- .../apache/druid/storage/s3/S3UtilsTest.java | 22 +++++++++++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java index 65cf8e04249e..c41878ce51e1 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java @@ -92,6 +92,10 @@ public boolean apply(Throwable e) } else if (e instanceof SdkClientException && e.getMessage().contains("Unable to execute HTTP request")) { // This is likely due to a temporary DNS issue and can be retried. return true; + } else if (e instanceof SdkClientException && e.getMessage().contains("Unable to find a region via the region provider chain")) { + // This can happen sometimes when AWS isn't able to obtain the credentials for some service: + // https://github.com/aws/aws-sdk-java/issues/2285 + return true; } else if (e instanceof AmazonClientException) { return AWSClientUtil.isClientExceptionRecoverable((AmazonClientException) e); } else { @@ -101,8 +105,8 @@ public boolean apply(Throwable e) }; /** - * Retries S3 operations that fail due to io-related exceptions. Service-level exceptions (access denied, file not - * found, etc) are not retried. + * Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc). + * Service-level exceptions (access denied, file not found, etc) are not retried. */ public static T retryS3Operation(Task f) throws Exception { @@ -110,8 +114,9 @@ public static T retryS3Operation(Task f) throws Exception } /** - * Retries S3 operations that fail due to io-related exceptions. Service-level exceptions (access denied, file not - * found, etc) are not retried. Also provide a way to set maxRetries that can be useful, i.e. for testing. + * Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc). + * Service-level exceptions (access denied, file not found, etc) are not retried. + * Also provide a way to set maxRetries that can be useful, i.e. for testing. */ public static T retryS3Operation(Task f, int maxRetries) throws Exception { diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/ServerSideEncryptingAmazonS3.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/ServerSideEncryptingAmazonS3.java index 320a0b9a6f99..0fb93a208337 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/ServerSideEncryptingAmazonS3.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/ServerSideEncryptingAmazonS3.java @@ -204,7 +204,15 @@ public ServerSideEncryptingAmazonS3 build() throw new ISE("S3StorageConfig cannot be null!"); } - return new ServerSideEncryptingAmazonS3(amazonS3ClientBuilder.build(), s3StorageConfig.getServerSideEncryption()); + AmazonS3 amazonS3Client; + try { + amazonS3Client = S3Utils.retryS3Operation(() -> amazonS3ClientBuilder.build()); + } + catch (Exception e) { + throw new RuntimeException(e); + } + + return new ServerSideEncryptingAmazonS3(amazonS3Client, s3StorageConfig.getServerSideEncryption()); } } } diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java index 6667d78a8c64..0c5e5840efe1 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java @@ -19,6 +19,7 @@ package org.apache.druid.storage.s3; +import com.amazonaws.SdkClientException; import com.amazonaws.services.s3.model.AmazonS3Exception; import org.junit.Assert; import org.junit.Test; @@ -108,4 +109,25 @@ public void testRetryWith5XXErrorsExceedingMaxRetries() ); Assert.assertEquals(maxRetries, count.get()); } + + @Test + public void testRetryWithSdkClientException() throws Exception + { + final int maxRetries = 3; + final AtomicInteger count = new AtomicInteger(); + S3Utils.retryS3Operation( + () -> { + if (count.incrementAndGet() >= maxRetries) { + return "hey"; + } else { + throw new SdkClientException( + "Unable to find a region via the region provider chain. " + + "Must provide an explicit region in the builder or setup environment to supply a region." + ); + } + }, + maxRetries + ); + Assert.assertEquals(maxRetries, count.get()); + } }