From b79d9309247069d5beea95554ebba81a5d8f48fa Mon Sep 17 00:00:00 2001 From: Rahul Prakash Date: Fri, 21 Nov 2025 10:54:15 +0530 Subject: [PATCH 1/5] Add timeout and retry logic to Azure token fetch Previously, the getAccessToken method used an unbounded blocking call which could hang indefinitely if Azure's token endpoint was slow or unresponsive. This change adds defensive timeout and retry mechanisms: - 15-second timeout per individual token request attempt - Exponential backoff retry (3 attempts: 2s, 4s, 8s) with 50% jitter to prevent thundering herd during mass failures - 90-second overall timeout as a safety net - Specific retry logic for known transient Azure AD errors (AADSTS50058, AADSTS50078, AADSTS700084, 503, 429) This makes the system more resilient to transient Azure service issues and prevents indefinite blocking that could cascade to request timeouts or service degradation. --- .../AzureCredentialsStorageIntegration.java | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java index a043a7daa5..6460d6d8ab 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java @@ -39,6 +39,7 @@ import com.azure.storage.file.datalake.sas.PathSasPermission; import com.google.common.annotations.VisibleForTesting; import jakarta.annotation.Nonnull; +import java.time.Duration; import java.time.Instant; import java.time.OffsetDateTime; import java.time.Period; @@ -55,6 +56,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Mono; +import reactor.util.retry.Retry; /** Azure credential vendor that supports generating SAS token */ public class AzureCredentialsStorageIntegration @@ -312,16 +314,83 @@ private void validateAccountAndContainer( }); } + /** + * Fetches an Azure access token with timeout and retry logic to handle transient failures. + * + *

This method implements a defensive strategy against slow or failing token requests: + * + *

+ * + * @param tenantId the Azure tenant ID + * @return the access token + * @throws RuntimeException if token fetch fails after all retries or times out + */ private AccessToken getAccessToken(String tenantId) { String scope = "https://storage.azure.com/.default"; AccessToken accessToken = defaultAzureCredential .getToken(new TokenRequestContext().addScopes(scope).setTenantId(tenantId)) - .blockOptional() + .timeout(Duration.ofSeconds(15)) // Per-attempt timeout + .doOnError( + error -> + LOGGER.warn( + "Error fetching Azure access token for tenant {}: {}", + tenantId, + error.getMessage())) + .retryWhen( + Retry.backoff(3, Duration.ofSeconds(2)) // 3 retries: 2s, 4s, 8s + .jitter(0.5) // ±50% jitter to prevent thundering herd + .filter( + throwable -> + throwable instanceof java.util.concurrent.TimeoutException + || isRetriableAzureException(throwable)) + .doBeforeRetry( + retrySignal -> + LOGGER.info( + "Retrying Azure token fetch for tenant {} (attempt {}/3)", + tenantId, + retrySignal.totalRetries() + 1)) + .onRetryExhaustedThrow( + (retryBackoffSpec, retrySignal) -> + new RuntimeException( + String.format( + "Azure token fetch exhausted after %d attempts for tenant %s", + retrySignal.totalRetries(), tenantId), + retrySignal.failure()))) + .blockOptional(Duration.ofSeconds(90)) // Maximum total wait time .orElse(null); + if (accessToken == null) { - throw new RuntimeException("No access token fetched!"); + throw new RuntimeException( + String.format("Failed to fetch Azure access token for tenant %s", tenantId)); } return accessToken; } + + /** + * Determines if an exception is retriable for Azure token requests. + * + * @param throwable the exception to check + * @return true if the exception should trigger a retry + */ + private boolean isRetriableAzureException(Throwable throwable) { + // Retry on timeout exceptions + if (throwable instanceof java.util.concurrent.TimeoutException) { + return true; + } + // Retry on common transient Azure credential exceptions + String message = throwable.getMessage(); + if (message != null) { + return message.contains("AADSTS50058") // Token endpoint timeout + || message.contains("AADSTS50078") // Service temporarily unavailable + || message.contains("AADSTS700084") // Token refresh required + || message.contains("503") // Service unavailable + || message.contains("429"); // Too many requests + } + return false; + } } From 3ea3d6448ed284e558b1db6fea3318037d1d8ce2 Mon Sep 17 00:00:00 2001 From: Rahul Prakash Date: Mon, 24 Nov 2025 12:04:47 +0530 Subject: [PATCH 2/5] Make Azure token fetch timeout and retry configurable - Add 4 generic cloud provider API configuration constants: CLOUD_API_TIMEOUT_SECONDS (default: 15) CLOUD_API_RETRY_COUNT (default: 3) CLOUD_API_RETRY_DELAY_SECONDS (default: 2) CLOUD_API_RETRY_JITTER_MILLIS (default: 500) - Update AzureCredentialsStorageIntegration to use configurable values - Remove hardcoded 90s overall timeout (per-attempt timeout + retries sufficient) - Improve error logging and retry logic documentation - Generic naming allows future reuse by AWS/GCP storage integrations Addresses review comments from dimas-b on PR 3113 --- .../core/config/FeatureConfiguration.java | 42 ++++++++++++++ .../AzureCredentialsStorageIntegration.java | 58 +++++++++++++------ 2 files changed, 83 insertions(+), 17 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index 1772f47256..6b4ae88179 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -438,4 +438,46 @@ public static void enforceFeatureEnabledOrThrow( "If set to true (default), allow credential vending for external catalogs. Note this requires ALLOW_EXTERNAL_CATALOG_CREDENTIAL_VENDING to be true first.") .defaultValue(true) .buildFeatureConfiguration(); + + public static final FeatureConfiguration CLOUD_API_TIMEOUT_SECONDS = + PolarisConfiguration.builder() + .key("CLOUD_API_TIMEOUT_SECONDS") + .description( + "Timeout in seconds for cloud provider API requests. " + + "Prevents indefinite blocking when cloud provider endpoints are slow or unresponsive. " + + "Used internally by storage integrations for credential vending and other cloud operations. " + + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") + .defaultValue(15) + .buildFeatureConfiguration(); + + public static final FeatureConfiguration CLOUD_API_RETRY_COUNT = + PolarisConfiguration.builder() + .key("CLOUD_API_RETRY_COUNT") + .description( + "Number of retry attempts for cloud provider API requests. " + + "Uses exponential backoff with jitter to handle transient failures. " + + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") + .defaultValue(3) + .buildFeatureConfiguration(); + + public static final FeatureConfiguration CLOUD_API_RETRY_DELAY_SECONDS = + PolarisConfiguration.builder() + .key("CLOUD_API_RETRY_DELAY_SECONDS") + .description( + "Initial delay in seconds before first retry for cloud provider API requests. " + + "Delay doubles with each retry (exponential backoff). " + + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") + .defaultValue(2) + .buildFeatureConfiguration(); + + public static final FeatureConfiguration CLOUD_API_RETRY_JITTER_MILLIS = + PolarisConfiguration.builder() + .key("CLOUD_API_RETRY_JITTER_MILLIS") + .description( + "Maximum jitter in milliseconds added to retry delays for cloud provider API requests. " + + "Helps prevent thundering herd when multiple requests fail simultaneously. " + + "Actual jitter is random between 0 and this value. " + + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") + .defaultValue(500) + .buildFeatureConfiguration(); } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java index 3bd6b6f72d..3529278742 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java @@ -18,6 +18,10 @@ */ package org.apache.polaris.core.storage.azure; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_DELAY_SECONDS; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_JITTER_MILLIS; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_COUNT; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_TIMEOUT_SECONDS; import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import com.azure.core.credential.AccessToken; @@ -122,7 +126,7 @@ public StorageAccessConfig getSubscopedCreds( OffsetDateTime.ofInstant( start.plusSeconds(3600), ZoneOffset.UTC); // 1 hr to sync with AWS and GCP Access token - AccessToken accessToken = getAccessToken(config().getTenantId()); + AccessToken accessToken = getAccessToken(realmConfig, config().getTenantId()); // Get user delegation key. // Set the new generated user delegation key expiry to 7 days and minute 1 min // Azure strictly requires the end time to be <= 7 days from the current time, -1 min to avoid @@ -315,26 +319,37 @@ private void validateAccountAndContainer( } /** - * Fetches an Azure access token with timeout and retry logic to handle transient failures. + * Fetches an Azure AD access token with timeout and retry logic to handle transient failures. * - *

This method implements a defensive strategy against slow or failing token requests: + *

This access token is used internally to obtain a user delegation key from Azure Storage, + * which is then used to generate SAS tokens for client credential vending. + * + *

This method implements a defensive strategy against slow or failing cloud provider requests: * *

    - *
  • 15-second timeout per individual request attempt - *
  • Exponential backoff retry (3 attempts: 2s, 4s, 8s) with 50% jitter - *
  • 90-second overall timeout as a safety net + *
  • Per-attempt timeout (configurable via CLOUD_API_TIMEOUT_SECONDS, default 15s) + *
  • Exponential backoff retry (configurable count and initial delay via CLOUD_API_RETRY_COUNT + * and CLOUD_API_RETRY_DELAY_SECONDS, defaults: 3 attempts starting at 2s) + *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_MILLIS, default 500ms) *
* + * @param realmConfig the realm configuration to get timeout and retry settings * @param tenantId the Azure tenant ID * @return the access token * @throws RuntimeException if token fetch fails after all retries or times out */ - private AccessToken getAccessToken(String tenantId) { + private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { + int timeoutSeconds = realmConfig.getConfig(CLOUD_API_TIMEOUT_SECONDS); + int retryCount = realmConfig.getConfig(CLOUD_API_RETRY_COUNT); + int initialDelaySeconds = realmConfig.getConfig(CLOUD_API_RETRY_DELAY_SECONDS); + int jitterMillis = realmConfig.getConfig(CLOUD_API_RETRY_JITTER_MILLIS); + double jitter = jitterMillis / 1000.0; // Convert millis to fraction for jitter factor + String scope = "https://storage.azure.com/.default"; AccessToken accessToken = defaultAzureCredential .getToken(new TokenRequestContext().addScopes(scope).setTenantId(tenantId)) - .timeout(Duration.ofSeconds(15)) // Per-attempt timeout + .timeout(Duration.ofSeconds(timeoutSeconds)) .doOnError( error -> LOGGER.warn( @@ -342,18 +357,16 @@ private AccessToken getAccessToken(String tenantId) { tenantId, error.getMessage())) .retryWhen( - Retry.backoff(3, Duration.ofSeconds(2)) // 3 retries: 2s, 4s, 8s - .jitter(0.5) // ±50% jitter to prevent thundering herd - .filter( - throwable -> - throwable instanceof java.util.concurrent.TimeoutException - || isRetriableAzureException(throwable)) + Retry.backoff(retryCount, Duration.ofSeconds(initialDelaySeconds)) + .jitter(jitter) + .filter(this::isRetriableAzureException) .doBeforeRetry( retrySignal -> LOGGER.info( - "Retrying Azure token fetch for tenant {} (attempt {}/3)", + "Retrying Azure token fetch for tenant {} (attempt {}/{})", tenantId, - retrySignal.totalRetries() + 1)) + retrySignal.totalRetries() + 1, + retryCount)) .onRetryExhaustedThrow( (retryBackoffSpec, retrySignal) -> new RuntimeException( @@ -361,7 +374,7 @@ private AccessToken getAccessToken(String tenantId) { "Azure token fetch exhausted after %d attempts for tenant %s", retrySignal.totalRetries(), tenantId), retrySignal.failure()))) - .blockOptional(Duration.ofSeconds(90)) // Maximum total wait time + .blockOptional() .orElse(null); if (accessToken == null) { @@ -374,6 +387,17 @@ private AccessToken getAccessToken(String tenantId) { /** * Determines if an exception is retriable for Azure token requests. * + *

Retries are attempted for: + * + *

    + *
  • TimeoutException - per-attempt timeout exceeded + *
  • AADSTS50058 - Token endpoint timeout + *
  • AADSTS50078 - Service temporarily unavailable + *
  • AADSTS700084 - Token refresh required + *
  • 503 - Service unavailable + *
  • 429 - Too many requests (rate limited) + *
+ * * @param throwable the exception to check * @return true if the exception should trigger a retry */ From 25a70b2fd54ed492bfa743585ae4f8c4838d4e40 Mon Sep 17 00:00:00 2001 From: Rahul Prakash Date: Tue, 25 Nov 2025 10:08:08 +0530 Subject: [PATCH 3/5] Use milliseconds for timeout and retry delay configs - Rename CLOUD_API_TIMEOUT_SECONDS to CLOUD_API_TIMEOUT_MILLIS (default: 15000ms) - Rename CLOUD_API_RETRY_DELAY_SECONDS to CLOUD_API_RETRY_DELAY_MILLIS (default: 2000ms) - Update AzureCredentialsStorageIntegration to use Duration.ofMillis() - Allows admins to configure sub-second timeouts for finer control Addresses review feedback from dimas-b --- .../core/config/FeatureConfiguration.java | 16 ++++++++-------- .../AzureCredentialsStorageIntegration.java | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index 6b4ae88179..176d6bf488 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -439,15 +439,15 @@ public static void enforceFeatureEnabledOrThrow( .defaultValue(true) .buildFeatureConfiguration(); - public static final FeatureConfiguration CLOUD_API_TIMEOUT_SECONDS = + public static final FeatureConfiguration CLOUD_API_TIMEOUT_MILLIS = PolarisConfiguration.builder() - .key("CLOUD_API_TIMEOUT_SECONDS") + .key("CLOUD_API_TIMEOUT_MILLIS") .description( - "Timeout in seconds for cloud provider API requests. " + "Timeout in milliseconds for cloud provider API requests. " + "Prevents indefinite blocking when cloud provider endpoints are slow or unresponsive. " + "Used internally by storage integrations for credential vending and other cloud operations. " + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") - .defaultValue(15) + .defaultValue(15000) .buildFeatureConfiguration(); public static final FeatureConfiguration CLOUD_API_RETRY_COUNT = @@ -460,14 +460,14 @@ public static void enforceFeatureEnabledOrThrow( .defaultValue(3) .buildFeatureConfiguration(); - public static final FeatureConfiguration CLOUD_API_RETRY_DELAY_SECONDS = + public static final FeatureConfiguration CLOUD_API_RETRY_DELAY_MILLIS = PolarisConfiguration.builder() - .key("CLOUD_API_RETRY_DELAY_SECONDS") + .key("CLOUD_API_RETRY_DELAY_MILLIS") .description( - "Initial delay in seconds before first retry for cloud provider API requests. " + "Initial delay in milliseconds before first retry for cloud provider API requests. " + "Delay doubles with each retry (exponential backoff). " + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") - .defaultValue(2) + .defaultValue(2000) .buildFeatureConfiguration(); public static final FeatureConfiguration CLOUD_API_RETRY_JITTER_MILLIS = diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java index 3529278742..970acb9e22 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java @@ -18,10 +18,10 @@ */ package org.apache.polaris.core.storage.azure; -import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_DELAY_SECONDS; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_DELAY_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_JITTER_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_COUNT; -import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_TIMEOUT_SECONDS; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_TIMEOUT_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import com.azure.core.credential.AccessToken; @@ -327,9 +327,9 @@ private void validateAccountAndContainer( *

This method implements a defensive strategy against slow or failing cloud provider requests: * *

    - *
  • Per-attempt timeout (configurable via CLOUD_API_TIMEOUT_SECONDS, default 15s) + *
  • Per-attempt timeout (configurable via CLOUD_API_TIMEOUT_MILLIS, default 15000ms) *
  • Exponential backoff retry (configurable count and initial delay via CLOUD_API_RETRY_COUNT - * and CLOUD_API_RETRY_DELAY_SECONDS, defaults: 3 attempts starting at 2s) + * and CLOUD_API_RETRY_DELAY_MILLIS, defaults: 3 attempts starting at 2000ms) *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_MILLIS, default 500ms) *
* @@ -339,9 +339,9 @@ private void validateAccountAndContainer( * @throws RuntimeException if token fetch fails after all retries or times out */ private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { - int timeoutSeconds = realmConfig.getConfig(CLOUD_API_TIMEOUT_SECONDS); + int timeoutMillis = realmConfig.getConfig(CLOUD_API_TIMEOUT_MILLIS); int retryCount = realmConfig.getConfig(CLOUD_API_RETRY_COUNT); - int initialDelaySeconds = realmConfig.getConfig(CLOUD_API_RETRY_DELAY_SECONDS); + int initialDelayMillis = realmConfig.getConfig(CLOUD_API_RETRY_DELAY_MILLIS); int jitterMillis = realmConfig.getConfig(CLOUD_API_RETRY_JITTER_MILLIS); double jitter = jitterMillis / 1000.0; // Convert millis to fraction for jitter factor @@ -349,7 +349,7 @@ private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { AccessToken accessToken = defaultAzureCredential .getToken(new TokenRequestContext().addScopes(scope).setTenantId(tenantId)) - .timeout(Duration.ofSeconds(timeoutSeconds)) + .timeout(Duration.ofMillis(timeoutMillis)) .doOnError( error -> LOGGER.warn( @@ -357,7 +357,7 @@ private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { tenantId, error.getMessage())) .retryWhen( - Retry.backoff(retryCount, Duration.ofSeconds(initialDelaySeconds)) + Retry.backoff(retryCount, Duration.ofMillis(initialDelayMillis)) .jitter(jitter) .filter(this::isRetriableAzureException) .doBeforeRetry( From 13f1c0452ec20b8d5be0a2ba7cada0d8f20def7b Mon Sep 17 00:00:00 2001 From: Rahul Prakash Date: Wed, 26 Nov 2025 19:49:51 +0530 Subject: [PATCH 4/5] Change jitter from millis to factor (0.0-1.0) as suggested by @dimas-b The jitter factor applies to the computed exponential backoff delay, not a fixed millisecond value. Using CLOUD_API_RETRY_JITTER_FACTOR (0.0-1.0 range) is clearer and conceptually correct. --- .../polaris/core/config/FeatureConfiguration.java | 13 +++++++------ .../azure/AzureCredentialsStorageIntegration.java | 9 ++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index 176d6bf488..b69b760a35 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -470,14 +470,15 @@ public static void enforceFeatureEnabledOrThrow( .defaultValue(2000) .buildFeatureConfiguration(); - public static final FeatureConfiguration CLOUD_API_RETRY_JITTER_MILLIS = - PolarisConfiguration.builder() - .key("CLOUD_API_RETRY_JITTER_MILLIS") + public static final FeatureConfiguration CLOUD_API_RETRY_JITTER_FACTOR = + PolarisConfiguration.builder() + .key("CLOUD_API_RETRY_JITTER_FACTOR") .description( - "Maximum jitter in milliseconds added to retry delays for cloud provider API requests. " + "Jitter factor (0.0 to 1.0) applied to retry delays for cloud provider API requests. " + + "The jitter is applied as a random percentage of the computed exponential backoff delay. " + + "For example, 0.5 means up to 50%% random jitter will be added to each retry delay. " + "Helps prevent thundering herd when multiple requests fail simultaneously. " - + "Actual jitter is random between 0 and this value. " + "Currently only used by Azure storage integration (not yet implemented for AWS S3 or GCP).") - .defaultValue(500) + .defaultValue(0.5) .buildFeatureConfiguration(); } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java index 970acb9e22..fe59674ed6 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java @@ -19,7 +19,7 @@ package org.apache.polaris.core.storage.azure; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_DELAY_MILLIS; -import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_JITTER_MILLIS; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_JITTER_FACTOR; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_COUNT; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_TIMEOUT_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; @@ -330,7 +330,7 @@ private void validateAccountAndContainer( *
  • Per-attempt timeout (configurable via CLOUD_API_TIMEOUT_MILLIS, default 15000ms) *
  • Exponential backoff retry (configurable count and initial delay via CLOUD_API_RETRY_COUNT * and CLOUD_API_RETRY_DELAY_MILLIS, defaults: 3 attempts starting at 2000ms) - *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_MILLIS, default 500ms) + *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_FACTOR, default 0.5 = 50%%) * * * @param realmConfig the realm configuration to get timeout and retry settings @@ -342,8 +342,7 @@ private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { int timeoutMillis = realmConfig.getConfig(CLOUD_API_TIMEOUT_MILLIS); int retryCount = realmConfig.getConfig(CLOUD_API_RETRY_COUNT); int initialDelayMillis = realmConfig.getConfig(CLOUD_API_RETRY_DELAY_MILLIS); - int jitterMillis = realmConfig.getConfig(CLOUD_API_RETRY_JITTER_MILLIS); - double jitter = jitterMillis / 1000.0; // Convert millis to fraction for jitter factor + double jitter = realmConfig.getConfig(CLOUD_API_RETRY_JITTER_FACTOR); String scope = "https://storage.azure.com/.default"; AccessToken accessToken = @@ -358,7 +357,7 @@ private AccessToken getAccessToken(RealmConfig realmConfig, String tenantId) { error.getMessage())) .retryWhen( Retry.backoff(retryCount, Duration.ofMillis(initialDelayMillis)) - .jitter(jitter) + .jitter(jitter) // Apply jitter factor to computed delay .filter(this::isRetriableAzureException) .doBeforeRetry( retrySignal -> From cf3f6c099f22ee7968455e3dc7832ad115ac7822 Mon Sep 17 00:00:00 2001 From: Rahul Prakash Date: Wed, 26 Nov 2025 20:07:22 +0530 Subject: [PATCH 5/5] Apply spotless formatting --- .../storage/azure/AzureCredentialsStorageIntegration.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java index fe59674ed6..d16a3f115b 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/azure/AzureCredentialsStorageIntegration.java @@ -18,9 +18,9 @@ */ package org.apache.polaris.core.storage.azure; +import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_COUNT; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_DELAY_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_JITTER_FACTOR; -import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_RETRY_COUNT; import static org.apache.polaris.core.config.FeatureConfiguration.CLOUD_API_TIMEOUT_MILLIS; import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; @@ -330,7 +330,8 @@ private void validateAccountAndContainer( *
  • Per-attempt timeout (configurable via CLOUD_API_TIMEOUT_MILLIS, default 15000ms) *
  • Exponential backoff retry (configurable count and initial delay via CLOUD_API_RETRY_COUNT * and CLOUD_API_RETRY_DELAY_MILLIS, defaults: 3 attempts starting at 2000ms) - *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_FACTOR, default 0.5 = 50%%) + *
  • Jitter to prevent thundering herd (configurable via CLOUD_API_RETRY_JITTER_FACTOR, + * default 0.5 = 50%%) * * * @param realmConfig the realm configuration to get timeout and retry settings