From 33c85bc1ea95d807bd88778ad91c725a98c7103c Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 16 Oct 2025 15:40:42 +1100 Subject: [PATCH 01/31] Factor out non-CSP-specific retrying logic --- .../s3/S3RetryingInputStream.java | 453 +++++++----------- .../common/blobstore/RetryingInputStream.java | 322 +++++++++++++ 2 files changed, 486 insertions(+), 289 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java index ed06e7f594ca0..f73d73a578f99 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java @@ -14,12 +14,11 @@ import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.GetObjectResponse; -import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; import org.elasticsearch.common.blobstore.OperationPurpose; -import org.elasticsearch.core.IOUtils; +import org.elasticsearch.common.blobstore.RetryingInputStream; import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; import org.elasticsearch.repositories.s3.S3BlobStore.Operation; import org.elasticsearch.rest.RestStatus; @@ -27,11 +26,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.NoSuchFileException; -import java.util.ArrayList; -import java.util.List; import java.util.Map; -import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.repositories.s3.S3BlobStore.configureRequestForMetrics; /** @@ -41,107 +37,112 @@ * * See https://github.com/aws/aws-sdk-java/issues/856 for the related SDK issue */ -class S3RetryingInputStream extends InputStream { +class S3RetryingInputStream extends RetryingInputStream { private static final Logger logger = LogManager.getLogger(S3RetryingInputStream.class); static final int MAX_SUPPRESSED_EXCEPTIONS = 10; - private final OperationPurpose purpose; - private final S3BlobStore blobStore; - private final String blobKey; - private final long start; - private final long end; - private final List failures; - - private ResponseInputStream currentStream; - private long currentStreamFirstOffset; - private long currentStreamLastOffset; - private int attempt = 1; - private int failuresAfterMeaningfulProgress = 0; - private long currentOffset; - private boolean closed; - private boolean eof; - private boolean aborted = false; - S3RetryingInputStream(OperationPurpose purpose, S3BlobStore blobStore, String blobKey) throws IOException { this(purpose, blobStore, blobKey, 0, Long.MAX_VALUE - 1); } // both start and end are inclusive bounds, following the definition in GetObjectRequest.setRange S3RetryingInputStream(OperationPurpose purpose, S3BlobStore blobStore, String blobKey, long start, long end) throws IOException { - if (start < 0L) { - throw new IllegalArgumentException("start must be non-negative"); - } - if (end < start || end == Long.MAX_VALUE) { - throw new IllegalArgumentException("end must be >= start and not Long.MAX_VALUE"); - } - this.purpose = purpose; - this.blobStore = blobStore; - this.blobKey = blobKey; - this.failures = new ArrayList<>(MAX_SUPPRESSED_EXCEPTIONS); - this.start = start; - this.end = end; - final int initialAttempt = attempt; - openStreamWithRetry(); - maybeLogAndRecordMetricsForSuccess(initialAttempt, "open"); + super(new S3BlobStoreServices(blobStore, blobKey, purpose), purpose, start, end); } - private void openStreamWithRetry() throws IOException { - while (true) { + private record S3BlobStoreServices(S3BlobStore blobStore, String blobKey, OperationPurpose purpose) + implements + BlobStoreServices { + + @Override + public InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) throws IOException { try (AmazonS3Reference clientReference = blobStore.clientReference()) { final var getObjectRequestBuilder = GetObjectRequest.builder().bucket(blobStore.bucket()).key(blobKey); configureRequestForMetrics(getObjectRequestBuilder, blobStore, Operation.GET_OBJECT, purpose); - if (currentOffset > 0 || start > 0 || end < Long.MAX_VALUE - 1) { - assert start + currentOffset <= end - : "requesting beyond end, start = " + start + " offset=" + currentOffset + " end=" + end; - getObjectRequestBuilder.range("bytes=" + Math.addExact(start, currentOffset) + "-" + end); + if (start > 0 || end < Long.MAX_VALUE - 1) { + assert start <= end : "requesting beyond end, start = " + start + " end=" + end; + getObjectRequestBuilder.range("bytes=" + start + "-" + end); + } + if (version != null) { + // This is a second or subsequent request, ensure the object hasn't changed since the first request + getObjectRequestBuilder.ifMatch(version); } - this.currentStreamFirstOffset = Math.addExact(start, currentOffset); final var getObjectRequest = getObjectRequestBuilder.build(); final var getObjectResponse = clientReference.client().getObject(getObjectRequest); - this.currentStreamLastOffset = Math.addExact(currentStreamFirstOffset, getStreamLength(getObjectResponse.response())); - this.currentStream = getObjectResponse; - return; + return new InputStreamAtVersion<>(new S3InputStream(getObjectResponse, start, end), getObjectResponse.response().eTag()); } catch (SdkException e) { if (e instanceof SdkServiceException sdkServiceException) { if (sdkServiceException.statusCode() == RestStatus.NOT_FOUND.getStatus()) { - throw addSuppressedExceptions( - new NoSuchFileException("Blob object [" + blobKey + "] not found: " + sdkServiceException.getMessage()) - ); + throw new NoSuchFileException("Blob object [" + blobKey + "] not found: " + sdkServiceException.getMessage()); } if (sdkServiceException.statusCode() == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) { - throw addSuppressedExceptions( - new RequestedRangeNotSatisfiedException( - blobKey, - currentStreamFirstOffset, - (end < Long.MAX_VALUE - 1) ? end - currentStreamFirstOffset + 1 : end, - sdkServiceException - ) + throw new RequestedRangeNotSatisfiedException( + blobKey, + start, + (end < Long.MAX_VALUE - 1) ? end - start + 1 : end, + sdkServiceException ); } } - - if (attempt == 1) { - blobStore.getS3RepositoriesMetrics().retryStartedCounter().incrementBy(1, metricAttributes("open")); - } - final long delayInMillis = maybeLogAndComputeRetryDelay("opening", e); - delayBeforeRetry(delayInMillis); + throw new IOException("failed to get input stream for [" + blobKey + "]", e); } } + + @Override + public void onRetryStarted(String action) { + blobStore.getS3RepositoriesMetrics().retryStartedCounter().incrementBy(1, metricAttributes(action)); + } + + @Override + public void onRetrySucceeded(String action, long numberOfRetries) { + final Map attributes = metricAttributes(action); + blobStore.getS3RepositoriesMetrics().retryCompletedCounter().incrementBy(1, attributes); + blobStore.getS3RepositoriesMetrics().retryHistogram().record(numberOfRetries, attributes); + } + + @Override + public long getMeaningfulProgressSize() { + return Math.max(1L, blobStore.bufferSizeInBytes() / 100L); + } + + @Override + public int getMaxRetries() { + return blobStore.getMaxRetries(); + } + + @Override + public String getBlobDescription() { + return blobStore.bucket() + "/" + blobKey; + } + + private Map metricAttributes(String action) { + return Map.of( + "repo_type", + S3Repository.TYPE, + "repo_name", + blobStore.getRepositoryMetadata().name(), + "operation", + Operation.GET_OBJECT.getKey(), + "purpose", + purpose.getKey(), + "action", + action + ); + } } - private long getStreamLength(final GetObjectResponse getObjectResponse) { + private static long getStreamLength(final GetObjectResponse getObjectResponse, long expectedStart, long expectedEnd) { try { - return tryGetStreamLength(getObjectResponse); + return tryGetStreamLength(getObjectResponse, expectedStart, expectedEnd); } catch (Exception e) { assert false : e; return Long.MAX_VALUE - 1L; // assume a large stream so that the underlying stream is aborted on closing, unless eof is reached } } - // exposed for testing - long tryGetStreamLength(GetObjectResponse getObjectResponse) { + private static long tryGetStreamLength(GetObjectResponse getObjectResponse, long expectedStart, long expectedEnd) { // Returns the content range of the object if response contains the Content-Range header. final var rangeString = getObjectResponse.contentRange(); if (rangeString != null) { @@ -164,14 +165,14 @@ long tryGetStreamLength(GetObjectResponse getObjectResponse) { if (rangeEnd < rangeStart) { throw new IllegalArgumentException("invalid Content-range header [" + rangeString + "]"); } - if (rangeStart != start + currentOffset) { + if (rangeStart != expectedStart) { throw new IllegalArgumentException( - "unexpected Content-range header [" + rangeString + "], should have started at " + (start + currentOffset) + "unexpected Content-range header [" + rangeString + "], should have started at " + expectedStart ); } - if (rangeEnd > end) { + if (rangeEnd > expectedEnd) { throw new IllegalArgumentException( - "unexpected Content-range header [" + rangeString + "], should have ended no later than " + end + "unexpected Content-range header [" + rangeString + "], should have ended no later than " + expectedEnd ); } return rangeEnd - rangeStart + 1L; @@ -179,250 +180,124 @@ long tryGetStreamLength(GetObjectResponse getObjectResponse) { return getObjectResponse.contentLength(); } - @Override - public int read() throws IOException { - ensureOpen(); - final int initialAttempt = attempt; - while (true) { - try { - final int result = currentStream.read(); - if (result == -1) { - eof = true; - } else { - currentOffset += 1; - } - maybeLogAndRecordMetricsForSuccess(initialAttempt, "read"); - return result; - } catch (IOException e) { - if (attempt == initialAttempt) { - blobStore.getS3RepositoriesMetrics().retryStartedCounter().incrementBy(1, metricAttributes("read")); - } - reopenStreamOrFail(e); - } + private static class S3InputStream extends InputStream { + + private final ResponseInputStream responseStream; + private final long start; + private final long end; + private final long lastOffset; + private int offset = 0; + private boolean closed; + private boolean eof; + private boolean aborted; + + private S3InputStream(ResponseInputStream responseStream, long start, long end) { + this.responseStream = responseStream; + this.start = start; + this.end = end; + lastOffset = getStreamLength(responseStream.response(), start, end); } - } - @Override - public int read(byte[] b, int off, int len) throws IOException { - ensureOpen(); - final int initialAttempt = attempt; - while (true) { - try { - final int bytesRead = currentStream.read(b, off, len); - if (bytesRead == -1) { - eof = true; - } else { - currentOffset += bytesRead; - } - maybeLogAndRecordMetricsForSuccess(initialAttempt, "read"); - return bytesRead; - } catch (IOException e) { - if (attempt == initialAttempt) { - blobStore.getS3RepositoriesMetrics().retryStartedCounter().incrementBy(1, metricAttributes("read")); - } - reopenStreamOrFail(e); + @Override + public int read() throws IOException { + ensureOpen(); + int result = responseStream.read(); + if (result == -1) { + eof = true; + } else { + offset++; } + return result; } - } - - private void ensureOpen() { - if (closed) { - assert false : "using S3RetryingInputStream after close"; - throw new IllegalStateException("using S3RetryingInputStream after close"); - } - } - private void reopenStreamOrFail(IOException e) throws IOException { - final long meaningfulProgressSize = Math.max(1L, blobStore.bufferSizeInBytes() / 100L); - if (currentStreamProgress() >= meaningfulProgressSize) { - failuresAfterMeaningfulProgress += 1; + @Override + public int read(byte[] b, int off, int len) throws IOException { + ensureOpen(); + final int bytesRead = responseStream.read(b, off, len); + if (bytesRead == -1) { + eof = true; + } else { + offset += bytesRead; + } + return bytesRead; } - final long delayInMillis = maybeLogAndComputeRetryDelay("reading", e); - maybeAbort(currentStream); - IOUtils.closeWhileHandlingException(currentStream); - delayBeforeRetry(delayInMillis); - openStreamWithRetry(); - } - - // The method throws if the operation should *not* be retried. Otherwise, it keeps a record for the attempt and associated failure - // and compute the delay before retry. - private long maybeLogAndComputeRetryDelay(String action, T e) throws T { - if (shouldRetry(attempt) == false) { - final var finalException = addSuppressedExceptions(e); - logForFailure(action, finalException); - throw finalException; + private void ensureOpen() { + if (closed) { + assert false : "using S3InputStream after close"; + throw new IllegalStateException("using S3InputStream after close"); + } } - // Log at info level for the 1st retry and then exponentially less - logForRetry(Integer.bitCount(attempt) == 1 ? Level.INFO : Level.DEBUG, action, e); - if (failures.size() < MAX_SUPPRESSED_EXCEPTIONS) { - failures.add(e); + @Override + public void close() throws IOException { + maybeAbort(responseStream); + try { + responseStream.close(); + } finally { + closed = true; + } } - final long delayInMillis = getRetryDelayInMillis(); - attempt += 1; // increment after computing delay because attempt affects the result - return delayInMillis; - } - - private void logForFailure(String action, Exception e) { - logger.warn( - () -> format( - "failed %s [%s/%s] at offset [%s] with purpose [%s]", - action, - blobStore.bucket(), - blobKey, - start + currentOffset, - purpose.getKey() - ), - e - ); - } - - private void logForRetry(Level level, String action, Exception e) { - logger.log( - level, - () -> format( - """ - failed %s [%s/%s] at offset [%s] with purpose [%s]; \ - this was attempt [%s] to read this blob which yielded [%s] bytes; in total \ - [%s] of the attempts to read this blob have made meaningful progress and do not count towards the maximum number of \ - retries; the maximum number of read attempts which do not make meaningful progress is [%s]""", - action, - blobStore.bucket(), - blobKey, - start + currentOffset, - purpose.getKey(), - attempt, - currentStreamProgress(), - failuresAfterMeaningfulProgress, - maxRetriesForNoMeaningfulProgress() - ), - e - ); - } - private void maybeLogAndRecordMetricsForSuccess(int initialAttempt, String action) { - if (attempt > initialAttempt) { - final int numberOfRetries = attempt - initialAttempt; - logger.info( - "successfully {} input stream for [{}/{}] with purpose [{}] after [{}] retries", - action, - blobStore.bucket(), - blobKey, - purpose.getKey(), - numberOfRetries - ); - final Map attributes = metricAttributes(action); - blobStore.getS3RepositoriesMetrics().retryCompletedCounter().incrementBy(1, attributes); - blobStore.getS3RepositoriesMetrics().retryHistogram().record(numberOfRetries, attributes); + /** + * Abort the {@link ResponseInputStream} if it wasn't read completely at the time this method is called, + * suppressing all thrown exceptions. + */ + private void maybeAbort(ResponseInputStream stream) { + if (isEof()) { + return; + } + try { + if (offset < lastOffset) { + stream.abort(); + aborted = true; + } + } catch (Exception e) { + logger.warn("Failed to abort stream before closing", e); + } } - } - private long currentStreamProgress() { - return Math.subtractExact(Math.addExact(start, currentOffset), currentStreamFirstOffset); - } - - private boolean shouldRetry(int attempt) { - if (purpose == OperationPurpose.REPOSITORY_ANALYSIS) { - return false; - } - if (purpose == OperationPurpose.INDICES) { - return true; + @Override + public long skip(long n) throws IOException { + // This could be optimized on a failure by re-opening stream directly to the preferred location. However, it is rarely called, + // so for now we will rely on the default implementation which just discards bytes by reading. + return super.skip(n); } - final int maxAttempts = blobStore.getMaxRetries() + 1; - return attempt < maxAttempts + failuresAfterMeaningfulProgress; - } - - private int maxRetriesForNoMeaningfulProgress() { - return purpose == OperationPurpose.INDICES ? Integer.MAX_VALUE : (blobStore.getMaxRetries() + 1); - } - private void delayBeforeRetry(long delayInMillis) { - try { - assert shouldRetry(attempt - 1) : "should not have retried"; - Thread.sleep(delayInMillis); - } catch (InterruptedException e) { - logger.info("s3 input stream delay interrupted", e); - Thread.currentThread().interrupt(); + @Override + public void reset() { + throw new UnsupportedOperationException("S3InputStream does not support seeking"); } - } - - // protected access for testing - protected long getRetryDelayInMillis() { - // Initial delay is 10 ms and cap max delay at 10 * 1024 millis, i.e. it retries every ~10 seconds at a minimum - return 10L << (Math.min(attempt - 1, 10)); - } - - private Map metricAttributes(String action) { - return Map.of( - "repo_type", - S3Repository.TYPE, - "repo_name", - blobStore.getRepositoryMetadata().name(), - "operation", - Operation.GET_OBJECT.getKey(), - "purpose", - purpose.getKey(), - "action", - action - ); - } - @Override - public void close() throws IOException { - maybeAbort(currentStream); - try { - currentStream.close(); - } finally { - closed = true; + // exposed for testing + private boolean isEof() { + return eof || offset == lastOffset; } - } - /** - * Abort the {@link ResponseInputStream} if it wasn't read completely at the time this method is called, - * suppressing all thrown exceptions. - */ - private void maybeAbort(ResponseInputStream stream) { - if (isEof()) { - return; - } - try { - if (start + currentOffset < currentStreamLastOffset) { - stream.abort(); - aborted = true; - } - } catch (Exception e) { - logger.warn("Failed to abort stream before closing", e); + // exposed for testing + private boolean isAborted() { + // just expose whether abort() was called, we cannot tell if the stream is really aborted + return aborted; } - } - - @Override - public long skip(long n) throws IOException { - // This could be optimized on a failure by re-opening stream directly to the preferred location. However, it is rarely called, - // so for now we will rely on the default implementation which just discards bytes by reading. - return super.skip(n); - } - @Override - public void reset() { - throw new UnsupportedOperationException("S3RetryingInputStream does not support seeking"); - } - - private T addSuppressedExceptions(T e) { - for (Exception failure : failures) { - e.addSuppressed(failure); + // exposed for testing + private long tryGetStreamLength(GetObjectResponse response) { + return S3RetryingInputStream.tryGetStreamLength(response, start, end); } - return e; } - // package-private for tests + // exposed for testing boolean isEof() { - return eof || start + currentOffset == currentStreamLastOffset; + return ((S3InputStream) currentStream.inputStream()).isEof(); } - // package-private for tests + // exposed for testing boolean isAborted() { - // just expose whether abort() was called, we cannot tell if the stream is really aborted - return aborted; + return ((S3InputStream) currentStream.inputStream()).isAborted(); + } + + // exposed for testing + long tryGetStreamLength(GetObjectResponse getObjectResponse) { + return ((S3InputStream) currentStream.inputStream()).tryGetStreamLength(getObjectResponse); } } diff --git a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java new file mode 100644 index 0000000000000..ae77fce652695 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java @@ -0,0 +1,322 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.common.blobstore; + +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.core.Strings.format; + +public abstract class RetryingInputStream extends InputStream { + + private static final Logger logger = LogManager.getLogger(RetryingInputStream.class); + + static final int MAX_SUPPRESSED_EXCEPTIONS = 10; + + private final BlobStoreServices blobStoreServices; + private final OperationPurpose purpose; + private final long start; + private final long end; + private final List failures; + + protected InputStreamAtVersion currentStream; + private long currentStreamFirstOffset; + private int attempt = 1; + private int failuresAfterMeaningfulProgress = 0; + private int currentOffset = 0; + private boolean closed = false; + + protected RetryingInputStream(BlobStoreServices blobStoreServices, OperationPurpose purpose) throws IOException { + this(blobStoreServices, purpose, 0L, Long.MAX_VALUE - 1L); + } + + @SuppressWarnings("this-escape") // TODO: We can do better than this but I don't want to touch the tests for the first implementation + protected RetryingInputStream(BlobStoreServices blobStoreServices, OperationPurpose purpose, long start, long end) + throws IOException { + if (start < 0L) { + throw new IllegalArgumentException("start must be non-negative"); + } + if (end < start || end == Long.MAX_VALUE) { + throw new IllegalArgumentException("end must be >= start and not Long.MAX_VALUE"); + } + this.blobStoreServices = blobStoreServices; + this.purpose = purpose; + this.failures = new ArrayList<>(MAX_SUPPRESSED_EXCEPTIONS); + this.start = start; + this.end = end; + final int initialAttempt = attempt; + openStreamWithRetry(); + maybeLogAndRecordMetricsForSuccess(initialAttempt, "open"); + } + + private void openStreamWithRetry() throws IOException { + while (true) { + if (currentOffset > 0 || start > 0 || end < Long.MAX_VALUE - 1) { + assert start + currentOffset <= end + : "requesting beyond end, start = " + start + " offset=" + currentOffset + " end=" + end; + } + try { + currentStream = blobStoreServices.getInputStreamAtVersion( + currentStream != null ? currentStream.version : null, + start + currentOffset, + end + ); + this.currentStreamFirstOffset = Math.addExact(start, currentOffset); + return; + } catch (NoSuchFileException | RequestedRangeNotSatisfiedException e) { + throw e; + } catch (IOException e) { + if (attempt == 1) { + blobStoreServices.onRetryStarted("open"); + } + final long delayInMillis = maybeLogAndComputeRetryDelay("opening", e); + delayBeforeRetry(delayInMillis); + } + } + } + + @Override + public int read() throws IOException { + ensureOpen(); + final int initialAttempt = attempt; + while (true) { + try { + final int result = currentStream.inputStream.read(); + if (result != -1) { + currentOffset += 1; + } + maybeLogAndRecordMetricsForSuccess(initialAttempt, "read"); + return result; + } catch (IOException e) { + if (attempt == initialAttempt) { + blobStoreServices.onRetryStarted("read"); + } + reopenStreamOrFail(e); + } + } + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + ensureOpen(); + final int initialAttempt = attempt; + while (true) { + try { + final int bytesRead = currentStream.inputStream.read(b, off, len); + if (bytesRead != -1) { + currentOffset += bytesRead; + } + maybeLogAndRecordMetricsForSuccess(initialAttempt, "read"); + return bytesRead; + } catch (IOException e) { + if (attempt == initialAttempt) { + blobStoreServices.onRetryStarted("read"); + } + reopenStreamOrFail(e); + } + } + } + + private void ensureOpen() { + if (closed) { + assert false : "using RetryingInputStream after close"; + throw new IllegalStateException("Stream is closed"); + } + } + + private void reopenStreamOrFail(IOException e) throws IOException { + final long meaningfulProgressSize = blobStoreServices.getMeaningfulProgressSize(); + if (currentStreamProgress() >= meaningfulProgressSize) { + failuresAfterMeaningfulProgress += 1; + } + final long delayInMillis = maybeLogAndComputeRetryDelay("reading", e); + IOUtils.closeWhileHandlingException(currentStream.inputStream); + + delayBeforeRetry(delayInMillis); + openStreamWithRetry(); + } + + // The method throws if the operation should *not* be retried. Otherwise, it keeps a record for the attempt and associated failure + // and compute the delay before retry. + private long maybeLogAndComputeRetryDelay(String action, T e) throws T { + if (shouldRetry(attempt) == false) { + final var finalException = addSuppressedExceptions(e); + logForFailure(action, finalException); + throw finalException; + } + + // Log at info level for the 1st retry and then exponentially less + logForRetry(Integer.bitCount(attempt) == 1 ? Level.INFO : Level.DEBUG, action, e); + if (failures.size() < MAX_SUPPRESSED_EXCEPTIONS) { + failures.add(e); + } + final long delayInMillis = getRetryDelayInMillis(); + attempt += 1; // increment after computing delay because attempt affects the result + return delayInMillis; + } + + private void logForFailure(String action, Exception e) { + logger.warn( + () -> format( + "failed %s [%s] at offset [%s] with purpose [%s]", + action, + blobStoreServices.getBlobDescription(), + start + currentOffset, + purpose.getKey() + ), + e + ); + } + + private void logForRetry(Level level, String action, Exception e) { + logger.log( + level, + () -> format( + """ + failed %s [%s] at offset [%s] with purpose [%s]; \ + this was attempt [%s] to read this blob which yielded [%s] bytes; in total \ + [%s] of the attempts to read this blob have made meaningful progress and do not count towards the maximum number of \ + retries; the maximum number of read attempts which do not make meaningful progress is [%s]""", + action, + blobStoreServices.getBlobDescription(), + start + currentOffset, + purpose.getKey(), + attempt, + currentStreamProgress(), + failuresAfterMeaningfulProgress, + maxRetriesForNoMeaningfulProgress() + ), + e + ); + } + + private void maybeLogAndRecordMetricsForSuccess(int initialAttempt, String action) { + if (attempt > initialAttempt) { + final int numberOfRetries = attempt - initialAttempt; + logger.info( + "successfully {} input stream for [{}] with purpose [{}] after [{}] retries", + action, + blobStoreServices.getBlobDescription(), + purpose.getKey(), + numberOfRetries + ); + blobStoreServices.onRetrySucceeded(action, numberOfRetries); + } + } + + private long currentStreamProgress() { + return Math.subtractExact(Math.addExact(start, currentOffset), currentStreamFirstOffset); + } + + private boolean shouldRetry(int attempt) { + if (purpose == OperationPurpose.REPOSITORY_ANALYSIS) { + return false; + } + if (purpose == OperationPurpose.INDICES) { + return true; + } + final int maxAttempts = blobStoreServices.getMaxRetries() + 1; + return attempt < maxAttempts + failuresAfterMeaningfulProgress; + } + + private int maxRetriesForNoMeaningfulProgress() { + return purpose == OperationPurpose.INDICES ? Integer.MAX_VALUE : (blobStoreServices.getMaxRetries() + 1); + } + + private void delayBeforeRetry(long delayInMillis) { + try { + assert shouldRetry(attempt - 1) : "should not have retried"; + Thread.sleep(delayInMillis); + } catch (InterruptedException e) { + logger.info("s3 input stream delay interrupted", e); + Thread.currentThread().interrupt(); + } + } + + // protected access for testing + protected long getRetryDelayInMillis() { + // Initial delay is 10 ms and cap max delay at 10 * 1024 millis, i.e. it retries every ~10 seconds at a minimum + return 10L << (Math.min(attempt - 1, 10)); + } + + @Override + public void close() throws IOException { + try { + currentStream.inputStream.close(); + } finally { + closed = true; + } + } + + @Override + public long skip(long n) throws IOException { + ensureOpen(); + return currentStream.inputStream.skip(n); + } + + @Override + public void reset() { + throw new UnsupportedOperationException("RetryingInputStream does not support seeking"); + } + + private T addSuppressedExceptions(T e) { + for (Exception failure : failures) { + e.addSuppressed(failure); + } + return e; + } + + /** + * This implements all the behavior that is blob-store-specific + * + * @param The type of the version used + */ + protected interface BlobStoreServices { + + /** + * Get an input stream for the given version + * + * @param version The version to request, or null if the latest version should be used + * @param start The start of the range to read, inclusive + * @param end The end of the range to read, exclusive, or {@code Long.MAX_VALUE - 1} if the end of the blob should be used + * @return An input stream for the given version + * @throws IOException if a retryable error occurs while opening the stream + * @throws NoSuchFileException if the blob does not exist, this is not retry-able + * @throws RequestedRangeNotSatisfiedException if the requested range is not valid, this is not retry-able + */ + InputStreamAtVersion getInputStreamAtVersion(@Nullable V version, long start, long end) throws IOException; + + void onRetryStarted(String action); + + void onRetrySucceeded(String action, long numberOfRetries); + + long getMeaningfulProgressSize(); + + int getMaxRetries(); + + String getBlobDescription(); + } + + protected record InputStreamAtVersion(InputStream inputStream, V version) { + // Make the default constructor public + public InputStreamAtVersion {} + + } +} From 419ea292b8872eab7e883263dda6856f9ec62862 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 17 Oct 2025 09:39:14 +1100 Subject: [PATCH 02/31] Restore original exception throwing --- .../repositories/s3/S3RetryingInputStream.java | 2 +- .../common/blobstore/RetryingInputStream.java | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java index f73d73a578f99..2d08fd9ff605b 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java @@ -86,7 +86,7 @@ public InputStreamAtVersion getInputStreamAtVersion(String version, long ); } } - throw new IOException("failed to get input stream for [" + blobKey + "]", e); + throw e; } } diff --git a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java index ae77fce652695..471fd7c6fdd14 100644 --- a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java +++ b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java @@ -82,7 +82,7 @@ private void openStreamWithRetry() throws IOException { return; } catch (NoSuchFileException | RequestedRangeNotSatisfiedException e) { throw e; - } catch (IOException e) { + } catch (RuntimeException | IOException e) { if (attempt == 1) { blobStoreServices.onRetryStarted("open"); } @@ -155,11 +155,18 @@ private void reopenStreamOrFail(IOException e) throws IOException { // The method throws if the operation should *not* be retried. Otherwise, it keeps a record for the attempt and associated failure // and compute the delay before retry. - private long maybeLogAndComputeRetryDelay(String action, T e) throws T { + private long maybeLogAndComputeRetryDelay(String action, Exception e) throws IOException { if (shouldRetry(attempt) == false) { final var finalException = addSuppressedExceptions(e); logForFailure(action, finalException); - throw finalException; + switch (finalException) { + case RuntimeException runtimeException: + throw runtimeException; + case IOException ioException: + throw ioException; + default: + throw new IOException("Error " + action + "blob", finalException); + } } // Log at info level for the 1st retry and then exponentially less From 643883f69462232c06691473fabdc80155089d31 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 17 Oct 2025 10:52:49 +1100 Subject: [PATCH 03/31] First pass AzureRetryingInputStream --- .../azure/AzureBlobContainer.java | 2 +- .../repositories/azure/AzureBlobStore.java | 56 ++++++++++----- .../azure/AzureRetryingInputStream.java | 71 +++++++++++++++++++ 3 files changed, 109 insertions(+), 20 deletions(-) create mode 100644 modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java index ab8e10ce9de27..72d5d3ba37ebc 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java @@ -68,7 +68,7 @@ private InputStream openInputStream(OperationPurpose purpose, String blobName, l throw new NoSuchFileException("Blob [" + blobKey + "] not found"); } try { - return blobStore.getInputStream(purpose, blobKey, position, length); + return new AzureRetryingInputStream(blobStore, purpose, blobKey, position, length); } catch (Exception e) { if (ExceptionsHelper.unwrap(e, HttpResponseException.class) instanceof HttpResponseException httpResponseException) { final var httpStatusCode = httpResponseException.getResponse().getStatusCode(); diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index bb330be6266d4..dde9fc79d58cc 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -17,7 +17,6 @@ import reactor.core.scheduler.Schedulers; import com.azure.core.http.HttpMethod; -import com.azure.core.http.rest.ResponseBase; import com.azure.core.util.BinaryData; import com.azure.core.util.FluxUtil; import com.azure.core.util.logging.ClientLogger; @@ -345,7 +344,13 @@ private static boolean isIgnorableBatchDeleteException(Throwable exception) { return false; } - public InputStream getInputStream(OperationPurpose purpose, String blob, long position, final @Nullable Long length) { + AzureInputStream getInputStream( + OperationPurpose purpose, + String blob, + long position, + final @Nullable Long length, + @Nullable String eTag + ) throws IOException { logger.trace(() -> format("reading container [%s], blob [%s]", container, blob)); final AzureBlobServiceClient azureBlobServiceClient = getAzureBlobServiceClientClient(purpose); final BlobServiceClient syncClient = azureBlobServiceClient.getSyncClient(); @@ -361,18 +366,15 @@ public InputStream getInputStream(OperationPurpose purpose, String blob, long po } BlobAsyncClient blobAsyncClient = asyncClient.getBlobContainerAsyncClient(container).getBlobAsyncClient(blob); int maxReadRetries = service.getMaxReadRetries(projectId, clientName); - try { - return new AzureInputStream( - blobAsyncClient, - position, - length == null ? totalSize : length, - totalSize, - maxReadRetries, - azureBlobServiceClient.getAllocator() - ); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + return new AzureInputStream( + blobAsyncClient, + position, + length == null ? totalSize : length, + totalSize, + maxReadRetries, + azureBlobServiceClient.getAllocator(), + eTag + ); } public Map listBlobsByPrefix(OperationPurpose purpose, String keyPath, String prefix) throws IOException { @@ -1076,11 +1078,12 @@ RequestMetricsRecorder getMetricsRecorder() { return requestMetricsRecorder; } - private static class AzureInputStream extends InputStream { + static class AzureInputStream extends InputStream { private final CancellableRateLimitedFluxIterator cancellableRateLimitedFluxIterator; private ByteBuf byteBuf; private boolean closed; private final ByteBufAllocator allocator; + private final String eTag; private AzureInputStream( final BlobAsyncClient client, @@ -1088,15 +1091,23 @@ private AzureInputStream( long rangeLength, long contentLength, int maxRetries, - ByteBufAllocator allocator + ByteBufAllocator allocator, + @Nullable String ifMatchETag ) throws IOException { rangeLength = Math.min(rangeLength, contentLength - rangeOffset); final BlobRange range = new BlobRange(rangeOffset, rangeLength); DownloadRetryOptions downloadRetryOptions = new DownloadRetryOptions().setMaxRetryRequests(maxRetries); - Flux byteBufFlux = client.downloadWithResponse(range, downloadRetryOptions, null, false) + final BlobRequestConditions requestConditions = ifMatchETag != null + ? new BlobRequestConditions().setIfMatch(ifMatchETag) + : null; + AtomicReference eTagRef = new AtomicReference<>(); + Flux byteBufFlux = client.downloadWithResponse(range, downloadRetryOptions, requestConditions, false) .flux() - .concatMap(ResponseBase::getValue) // it's important to use concatMap, since flatMap doesn't provide ordering - // guarantees and that's not fun to debug :( + .concatMap(response -> { + eTagRef.set(response.getDeserializedHeaders().getETag()); + return response.getValue(); + }) // it's important to use concatMap, since flatMap doesn't provide ordering + // guarantees and that's not fun to debug :( .filter(Objects::nonNull) .map(this::copyBuffer); // Sadly we have to copy the buffers since the memory is released after the flux execution // ends and we need that the byte buffer outlives that lifecycle. Since the SDK provides an @@ -1112,6 +1123,9 @@ private AzureInputStream( // blob doesn't exist byteBufFlux.subscribe(cancellableRateLimitedFluxIterator); getNextByteBuf(); + assert eTagRef.get() != null : "eTag should have been set"; + assert ifMatchETag == null || eTagRef.get().equals(ifMatchETag) : "eTag mismatch"; + this.eTag = eTagRef.get(); } private ByteBuf copyBuffer(ByteBuffer buffer) { @@ -1173,6 +1187,10 @@ public void close() { } } + public String getETag() { + return eTag; + } + private void releaseByteBuf(ByteBuf buf) { ReferenceCountUtil.safeRelease(buf); this.byteBuf = null; diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java new file mode 100644 index 0000000000000..34f88ed24805b --- /dev/null +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.azure; + +import org.elasticsearch.common.blobstore.OperationPurpose; +import org.elasticsearch.common.blobstore.RetryingInputStream; +import org.elasticsearch.core.Nullable; + +import java.io.IOException; + +public class AzureRetryingInputStream extends RetryingInputStream { + + protected AzureRetryingInputStream(AzureBlobStore azureBlobStore, OperationPurpose purpose, String blob) throws IOException { + this(azureBlobStore, purpose, blob, 0L, null); + } + + protected AzureRetryingInputStream(AzureBlobStore azureBlobStore, OperationPurpose purpose, String blob, long position, Long length) + throws IOException { + super( + new AzureBlobStoreServices(azureBlobStore, purpose, blob), + purpose, + position, + length == null ? Long.MAX_VALUE - 1 : position + length + ); + } + + private record AzureBlobStoreServices(AzureBlobStore blobStore, OperationPurpose purpose, String blob) + implements + RetryingInputStream.BlobStoreServices { + + @Override + public InputStreamAtVersion getInputStreamAtVersion(@Nullable String version, long start, long end) throws IOException { + Long length = end < Long.MAX_VALUE - 1 ? end - start : null; + AzureBlobStore.AzureInputStream inputStream = blobStore.getInputStream(purpose, blob, start, length, version); + return new InputStreamAtVersion<>(inputStream, inputStream.getETag()); + } + + @Override + public void onRetryStarted(String action) { + // No metrics for Azure + } + + @Override + public void onRetrySucceeded(String action, long numberOfRetries) { + // No metrics for Azure + } + + @Override + public long getMeaningfulProgressSize() { + // Any progress is meaningful for Azure + return 1; + } + + @Override + public int getMaxRetries() { + return 3; // TODO + } + + @Override + public String getBlobDescription() { + return blob; + } + } +} From 82f871aab539351c90135358aa59e0ff534e5c55 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 20 Oct 2025 16:50:36 +1100 Subject: [PATCH 04/31] Move not-found and range-not-satisfied logic into AzureRetryingInputStream --- .../azure/AzureBlobContainer.java | 20 +------------ .../azure/AzureRetryingInputStream.java | 29 +++++++++++++++++-- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java index 72d5d3ba37ebc..276c7e96284c4 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java @@ -9,11 +9,8 @@ package org.elasticsearch.repositories.azure; -import com.azure.core.exception.HttpResponseException; - import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; import org.elasticsearch.common.Strings; import org.elasticsearch.common.blobstore.BlobContainer; @@ -26,8 +23,6 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; -import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; -import org.elasticsearch.rest.RestStatus; import java.io.IOException; import java.io.InputStream; @@ -67,20 +62,7 @@ private InputStream openInputStream(OperationPurpose purpose, String blobName, l // stream to it. throw new NoSuchFileException("Blob [" + blobKey + "] not found"); } - try { - return new AzureRetryingInputStream(blobStore, purpose, blobKey, position, length); - } catch (Exception e) { - if (ExceptionsHelper.unwrap(e, HttpResponseException.class) instanceof HttpResponseException httpResponseException) { - final var httpStatusCode = httpResponseException.getResponse().getStatusCode(); - if (httpStatusCode == RestStatus.NOT_FOUND.getStatus()) { - throw new NoSuchFileException("Blob [" + blobKey + "] not found"); - } - if (httpStatusCode == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) { - throw new RequestedRangeNotSatisfiedException(blobKey, position, length == null ? -1 : length, e); - } - } - throw new IOException("Unable to get input stream for blob [" + blobKey + "]", e); - } + return new AzureRetryingInputStream(blobStore, purpose, blobKey, position, length); } @Override diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java index 34f88ed24805b..fdb3da605999b 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java @@ -9,11 +9,17 @@ package org.elasticsearch.repositories.azure; +import com.azure.core.exception.HttpResponseException; + +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.blobstore.RetryingInputStream; import org.elasticsearch.core.Nullable; +import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; +import org.elasticsearch.rest.RestStatus; import java.io.IOException; +import java.nio.file.NoSuchFileException; public class AzureRetryingInputStream extends RetryingInputStream { @@ -37,9 +43,26 @@ private record AzureBlobStoreServices(AzureBlobStore blobStore, OperationPurpose @Override public InputStreamAtVersion getInputStreamAtVersion(@Nullable String version, long start, long end) throws IOException { - Long length = end < Long.MAX_VALUE - 1 ? end - start : null; - AzureBlobStore.AzureInputStream inputStream = blobStore.getInputStream(purpose, blob, start, length, version); - return new InputStreamAtVersion<>(inputStream, inputStream.getETag()); + try { + final Long length = end < Long.MAX_VALUE - 1 ? end - start : null; + final AzureBlobStore.AzureInputStream inputStream = blobStore.getInputStream(purpose, blob, start, length, version); + return new InputStreamAtVersion<>(inputStream, inputStream.getETag()); + } catch (Exception e) { + if (ExceptionsHelper.unwrap(e, HttpResponseException.class) instanceof HttpResponseException httpResponseException) { + final var httpStatusCode = httpResponseException.getResponse().getStatusCode(); + if (httpStatusCode == RestStatus.NOT_FOUND.getStatus()) { + throw new NoSuchFileException("Blob [" + blob + "] not found"); + } + if (httpStatusCode == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) { + throw new RequestedRangeNotSatisfiedException(blob, start, end == Long.MAX_VALUE - 1 ? -1 : end - start, e); + } + } + switch (e) { + case RuntimeException runtimeException -> throw runtimeException; + case IOException ioException -> throw ioException; + default -> throw new IOException("Unable to get input stream for blob [" + blob + "]", e); + } + } } @Override From 7e936c2e6d1451510ea92ac127a127bee9dd413b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 20 Oct 2025 16:54:31 +1100 Subject: [PATCH 05/31] Create BlobRequestConditions eagerly --- .../elasticsearch/repositories/azure/AzureBlobStore.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index dde9fc79d58cc..e5b961935198c 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -1096,10 +1096,11 @@ private AzureInputStream( ) throws IOException { rangeLength = Math.min(rangeLength, contentLength - rangeOffset); final BlobRange range = new BlobRange(rangeOffset, rangeLength); - DownloadRetryOptions downloadRetryOptions = new DownloadRetryOptions().setMaxRetryRequests(maxRetries); - final BlobRequestConditions requestConditions = ifMatchETag != null - ? new BlobRequestConditions().setIfMatch(ifMatchETag) - : null; + final DownloadRetryOptions downloadRetryOptions = new DownloadRetryOptions().setMaxRetryRequests(maxRetries); + final BlobRequestConditions requestConditions = new BlobRequestConditions(); + if (ifMatchETag != null) { + requestConditions.setIfMatch(ifMatchETag); + } AtomicReference eTagRef = new AtomicReference<>(); Flux byteBufFlux = client.downloadWithResponse(range, downloadRetryOptions, requestConditions, false) .flux() From 46297908d8f191e77afdfc82a61a09bb041eccec Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 20 Oct 2025 17:01:46 +1100 Subject: [PATCH 06/31] Always set ifMatch (null check is redundant) --- .../org/elasticsearch/repositories/azure/AzureBlobStore.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index e5b961935198c..8c221a3f6f5ed 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -1097,10 +1097,7 @@ private AzureInputStream( rangeLength = Math.min(rangeLength, contentLength - rangeOffset); final BlobRange range = new BlobRange(rangeOffset, rangeLength); final DownloadRetryOptions downloadRetryOptions = new DownloadRetryOptions().setMaxRetryRequests(maxRetries); - final BlobRequestConditions requestConditions = new BlobRequestConditions(); - if (ifMatchETag != null) { - requestConditions.setIfMatch(ifMatchETag); - } + final BlobRequestConditions requestConditions = new BlobRequestConditions().setIfMatch(ifMatchETag); AtomicReference eTagRef = new AtomicReference<>(); Flux byteBufFlux = client.downloadWithResponse(range, downloadRetryOptions, requestConditions, false) .flux() From 832141e6f0b48399d3e39eb73aeedaadc1735140 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 21 Oct 2025 16:20:29 +1100 Subject: [PATCH 07/31] Use retrying stream to do retries --- .../elasticsearch/repositories/azure/AzureBlobStore.java | 7 +++++-- .../repositories/azure/AzureRetryingInputStream.java | 2 +- .../repositories/s3/S3RetryingInputStream.java | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index 8c221a3f6f5ed..69951f8d36f6c 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -344,6 +344,10 @@ private static boolean isIgnorableBatchDeleteException(Throwable exception) { return false; } + public int getMaxReadRetries() { + return service.getMaxReadRetries(projectId, clientName); + } + AzureInputStream getInputStream( OperationPurpose purpose, String blob, @@ -365,13 +369,12 @@ AzureInputStream getInputStream( totalSize = position + length; } BlobAsyncClient blobAsyncClient = asyncClient.getBlobContainerAsyncClient(container).getBlobAsyncClient(blob); - int maxReadRetries = service.getMaxReadRetries(projectId, clientName); return new AzureInputStream( blobAsyncClient, position, length == null ? totalSize : length, totalSize, - maxReadRetries, + 0, azureBlobServiceClient.getAllocator(), eTag ); diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java index fdb3da605999b..e3407a7c9620e 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java @@ -83,7 +83,7 @@ public long getMeaningfulProgressSize() { @Override public int getMaxRetries() { - return 3; // TODO + return blobStore.getMaxReadRetries(); } @Override diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java index 2d08fd9ff605b..a84e91b2d36ea 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java @@ -19,6 +19,7 @@ import org.elasticsearch.Version; import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.blobstore.RetryingInputStream; +import org.elasticsearch.core.Nullable; import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; import org.elasticsearch.repositories.s3.S3BlobStore.Operation; import org.elasticsearch.rest.RestStatus; @@ -57,7 +58,7 @@ private record S3BlobStoreServices(S3BlobStore blobStore, String blobKey, Operat BlobStoreServices { @Override - public InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) throws IOException { + public InputStreamAtVersion getInputStreamAtVersion(@Nullable String version, long start, long end) throws IOException { try (AmazonS3Reference clientReference = blobStore.clientReference()) { final var getObjectRequestBuilder = GetObjectRequest.builder().bucket(blobStore.bucket()).key(blobKey); configureRequestForMetrics(getObjectRequestBuilder, blobStore, Operation.GET_OBJECT, purpose); From 968d89ba689d95cefadc4ce66d807039d5954c77 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 21 Oct 2025 16:22:26 +1100 Subject: [PATCH 08/31] visibility --- .../org/elasticsearch/repositories/azure/AzureBlobStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index 69951f8d36f6c..ba755e00e2a6f 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -344,7 +344,7 @@ private static boolean isIgnorableBatchDeleteException(Throwable exception) { return false; } - public int getMaxReadRetries() { + int getMaxReadRetries() { return service.getMaxReadRetries(projectId, clientName); } From e06d79ab69d92283e0d0375506172e5df6e2c210 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 21 Oct 2025 16:24:05 +1100 Subject: [PATCH 09/31] tidy --- .../org/elasticsearch/repositories/azure/AzureBlobStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index ba755e00e2a6f..fc13c65a25e92 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -1101,7 +1101,7 @@ private AzureInputStream( final BlobRange range = new BlobRange(rangeOffset, rangeLength); final DownloadRetryOptions downloadRetryOptions = new DownloadRetryOptions().setMaxRetryRequests(maxRetries); final BlobRequestConditions requestConditions = new BlobRequestConditions().setIfMatch(ifMatchETag); - AtomicReference eTagRef = new AtomicReference<>(); + final AtomicReference eTagRef = new AtomicReference<>(); Flux byteBufFlux = client.downloadWithResponse(range, downloadRetryOptions, requestConditions, false) .flux() .concatMap(response -> { From 972a505b7cfb8ffa88a8bbd9f8daca70fe3edf9c Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 21 Oct 2025 17:30:00 +1100 Subject: [PATCH 10/31] Add test --- .../azure/AzureBlobContainerRetriesTests.java | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java index 9e9313249e967..72a29f2cde01e 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java @@ -10,6 +10,7 @@ import fixture.azure.AzureHttpHandler; +import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; import org.elasticsearch.common.Strings; @@ -121,6 +122,66 @@ public void testReadBlobWithRetries() throws Exception { } } + public void testReadBlobWithFailuresMidDownload() throws IOException { + final int responsesToSend = randomIntBetween(3, 5); + final AtomicInteger responseCounter = new AtomicInteger(responsesToSend); + final byte[] blobContents = randomBlobContent(); + final String eTag = UUIDs.base64UUID(); + httpServer.createContext("/account/container/read_blob_fail_mid_stream", exchange -> { + try { + Streams.readFully(exchange.getRequestBody()); + if ("HEAD".equals(exchange.getRequestMethod())) { + exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); + exchange.getResponseHeaders().add("x-ms-blob-content-length", String.valueOf(blobContents.length)); + exchange.getResponseHeaders().add("Content-Length", String.valueOf(blobContents.length)); + exchange.getResponseHeaders().add("x-ms-blob-type", "blockblob"); + exchange.sendResponseHeaders(RestStatus.OK.getStatus(), -1); + } else if ("GET".equals(exchange.getRequestMethod())) { + if (responseCounter.decrementAndGet() > 0) { + switch (randomIntBetween(1, 3)) { + case 1 -> { + final Integer rCode = randomFrom( + RestStatus.INTERNAL_SERVER_ERROR.getStatus(), + RestStatus.SERVICE_UNAVAILABLE.getStatus(), + RestStatus.TOO_MANY_REQUESTS.getStatus() + ); + logger.info("---> sending error: {}", rCode); + exchange.sendResponseHeaders(rCode, -1); + } + case 2 -> logger.info("---> sending no response"); + case 3 -> sendResponse(eTag, blobContents, exchange, true); + } + } else { + sendResponse(eTag, blobContents, exchange, false); + } + } + } finally { + exchange.close(); + } + }); + + final BlobContainer blobContainer = createBlobContainer(responsesToSend * 2); + try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "read_blob_fail_mid_stream")) { + assertArrayEquals(blobContents, BytesReference.toBytes(Streams.readFully(inputStream))); + } + } + + private void sendResponse(String eTag, byte[] blobContents, HttpExchange exchange, boolean partial) throws IOException { + final var ranges = getRanges(exchange); + final int start = ranges.v1().intValue(); + final int end = partial ? randomIntBetween(start, ranges.v2().intValue()) : ranges.v2().intValue(); + final var contents = Arrays.copyOfRange(blobContents, start, end + 1); + + logger.info("---> responding to: {} -> {} (sending chunk of size {})", ranges.v1(), ranges.v2(), contents.length); + exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); + exchange.getResponseHeaders().add("x-ms-blob-content-length", String.valueOf(blobContents.length)); + exchange.getResponseHeaders().add("Content-Length", String.valueOf(blobContents.length)); + exchange.getResponseHeaders().add("x-ms-blob-type", "blockblob"); + exchange.getResponseHeaders().add("ETag", eTag); + exchange.sendResponseHeaders(RestStatus.OK.getStatus(), blobContents.length - ranges.v1().intValue()); + exchange.getResponseBody().write(contents, 0, contents.length); + } + public void testReadRangeBlobWithRetries() throws Exception { final int maxRetries = randomIntBetween(1, 5); final CountDown countDownGet = new CountDown(maxRetries); From c4fcaa130c6e1265ac2eaa10c7fe9acb6662477f Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 7 Nov 2025 14:15:28 +1100 Subject: [PATCH 11/31] Use common retry logic for GCS --- .../azure/AzureRetryingInputStream.java | 4 - .../gcs/GoogleCloudStorageBlobStore.java | 6 +- ...GoogleCloudStorageRetryingInputStream.java | 286 +++++++----------- ...eCloudStorageRetryingInputStreamTests.java | 9 +- 4 files changed, 123 insertions(+), 182 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java index e3407a7c9620e..dd40e479a15ba 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java @@ -23,10 +23,6 @@ public class AzureRetryingInputStream extends RetryingInputStream { - protected AzureRetryingInputStream(AzureBlobStore azureBlobStore, OperationPurpose purpose, String blob) throws IOException { - this(azureBlobStore, purpose, blob, 0L, null); - } - protected AzureRetryingInputStream(AzureBlobStore azureBlobStore, OperationPurpose purpose, String blob, long position, Long length) throws IOException { super( diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java index 2af940c19bbac..399026ae5c066 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java @@ -139,7 +139,7 @@ class GoogleCloudStorageBlobStore implements BlobStore { this.casBackoffPolicy = casBackoffPolicy; } - private MeteredStorage client() throws IOException { + MeteredStorage client() throws IOException { return storageService.client(projectId, clientName, repositoryName, statsCollector); } @@ -229,7 +229,7 @@ boolean blobExists(OperationPurpose purpose, String blobName) throws IOException * @return the InputStream used to read the blob's content */ InputStream readBlob(OperationPurpose purpose, String blobName) throws IOException { - return new GoogleCloudStorageRetryingInputStream(purpose, client(), BlobId.of(bucketName, blobName)); + return new GoogleCloudStorageRetryingInputStream(this, purpose, BlobId.of(bucketName, blobName)); } /** @@ -252,8 +252,8 @@ InputStream readBlob(OperationPurpose purpose, String blobName, long position, l return new ByteArrayInputStream(new byte[0]); } else { return new GoogleCloudStorageRetryingInputStream( + this, purpose, - client(), BlobId.of(bucketName, blobName), position, Math.addExact(position, length - 1) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index a74e86d8ee677..618cafeae3d61 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -9,15 +9,14 @@ package org.elasticsearch.repositories.gcs; import com.google.api.client.http.HttpResponse; -import com.google.cloud.BaseService; -import com.google.cloud.RetryHelper; import com.google.cloud.storage.BlobId; import com.google.cloud.storage.StorageException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.common.blobstore.OperationPurpose; -import org.elasticsearch.core.IOUtils; +import org.elasticsearch.common.blobstore.RetryingInputStream; +import org.elasticsearch.core.Nullable; import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; import org.elasticsearch.rest.RestStatus; @@ -25,129 +24,149 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.NoSuchFileException; -import java.util.ArrayList; -import java.util.List; - -import static org.elasticsearch.core.Strings.format; /** * Wrapper around reads from GCS that will retry blob downloads that fail part-way through, resuming from where the failure occurred. * This should be handled by the SDK but it isn't today. This should be revisited in the future (e.g. before removing * the {@link org.elasticsearch.Version#V_7_0_0} version constant) and removed if the SDK handles retries itself in the future. */ -class GoogleCloudStorageRetryingInputStream extends InputStream { +class GoogleCloudStorageRetryingInputStream extends RetryingInputStream { private static final Logger logger = LogManager.getLogger(GoogleCloudStorageRetryingInputStream.class); - static final int MAX_SUPPRESSED_EXCEPTIONS = 10; - - private final OperationPurpose purpose; - private final MeteredStorage client; - private final BlobId blobId; - private final long start; - private final long end; - private final int maxAttempts; - private InputStream currentStream; - private int attempt = 1; - private List failures = new ArrayList<>(MAX_SUPPRESSED_EXCEPTIONS); - private long currentOffset; - private boolean closed; - private Long lastGeneration; - // Used for testing only - GoogleCloudStorageRetryingInputStream(OperationPurpose purpose, MeteredStorage client, BlobId blobId) throws IOException { - this(purpose, client, blobId, 0, Long.MAX_VALUE - 1); + GoogleCloudStorageRetryingInputStream(GoogleCloudStorageBlobStore blobStore, OperationPurpose purpose, BlobId blobId) + throws IOException { + this(blobStore, purpose, blobId, 0, Long.MAX_VALUE - 1); } // Used for testing only - GoogleCloudStorageRetryingInputStream(OperationPurpose purpose, MeteredStorage client, BlobId blobId, long start, long end) - throws IOException { - if (start < 0L) { - throw new IllegalArgumentException("start must be non-negative"); - } - if (end < start || end == Long.MAX_VALUE) { - throw new IllegalArgumentException("end must be >= start and not Long.MAX_VALUE"); - } - this.purpose = purpose; - this.client = client; - this.blobId = blobId; - this.start = start; - this.end = end; - this.maxAttempts = client.getOptions().getRetrySettings().getMaxAttempts(); - this.currentStream = openStream(); + GoogleCloudStorageRetryingInputStream( + GoogleCloudStorageBlobStore blobStore, + OperationPurpose purpose, + BlobId blobId, + long start, + long end + ) throws IOException { + super( + new GoogleCloudStorageBlobStoreServices( + blobStore, + purpose, + blobId, + blobStore.client().getOptions().getRetrySettings().getMaxAttempts() + ), + purpose, + start, + end + ); } - private InputStream openStream() throws IOException { - try { + private static class GoogleCloudStorageBlobStoreServices implements BlobStoreServices { + + private final GoogleCloudStorageBlobStore blobStore; + private final OperationPurpose purpose; + private final BlobId blobId; + private final int maxRetries; + + private GoogleCloudStorageBlobStoreServices( + GoogleCloudStorageBlobStore blobStore, + OperationPurpose purpose, + BlobId blobId, + int maxRetries + ) { + this.blobStore = blobStore; + this.purpose = purpose; + this.blobId = blobId; + this.maxRetries = maxRetries; + } + + @Override + public InputStreamAtVersion getInputStreamAtVersion(@Nullable Long lastGeneration, long start, long end) throws IOException { + final MeteredStorage client = blobStore.client(); try { - return RetryHelper.runWithRetries(() -> { - try { - final var meteredGet = client.meteredObjectsGet(purpose, blobId.getBucket(), blobId.getName()); - meteredGet.setReturnRawInputStream(true); - if (lastGeneration != null) { - meteredGet.setGeneration(lastGeneration); - } + try { + final var meteredGet = client.meteredObjectsGet(purpose, blobId.getBucket(), blobId.getName()); + meteredGet.setReturnRawInputStream(true); + if (lastGeneration != null) { + meteredGet.setGeneration(lastGeneration); + } - if (currentOffset > 0 || start > 0 || end < Long.MAX_VALUE - 1) { - if (meteredGet.getRequestHeaders() != null) { - meteredGet.getRequestHeaders().setRange("bytes=" + Math.addExact(start, currentOffset) + "-" + end); - } - } - final HttpResponse resp = meteredGet.executeMedia(); - // Store the generation of the first response we received, so we can detect - // if the file has changed if we need to resume - if (lastGeneration == null) { - lastGeneration = parseGenerationHeader(resp); + if (start > 0 || end < Long.MAX_VALUE - 1) { + if (meteredGet.getRequestHeaders() != null) { + meteredGet.getRequestHeaders().setRange("bytes=" + start + "-" + end); } + } + final HttpResponse resp = meteredGet.executeMedia(); + // Store the generation of the first response we received, so we can detect + // if the file has changed if we need to resume + if (lastGeneration == null) { + lastGeneration = parseGenerationHeader(resp); + } - final Long contentLength = resp.getHeaders().getContentLength(); - InputStream content = resp.getContent(); - if (contentLength != null) { - content = new ContentLengthValidatingInputStream(content, contentLength); - } - return content; - } catch (IOException e) { - throw StorageException.translate(e); + final Long contentLength = resp.getHeaders().getContentLength(); + InputStream content = resp.getContent(); + if (contentLength != null) { + content = new ContentLengthValidatingInputStream(content, contentLength); } - }, client.getOptions().getRetrySettings(), BaseService.EXCEPTION_HANDLER, client.getOptions().getClock()); - } catch (RetryHelper.RetryHelperException e) { - throw StorageException.translateAndThrow(e); - } - } catch (StorageException storageException) { - if (storageException.getCode() == RestStatus.NOT_FOUND.getStatus()) { - if (lastGeneration != null) { - throw addSuppressedExceptions( - new NoSuchFileException( + return new InputStreamAtVersion<>(content, lastGeneration); + } catch (IOException e) { + throw StorageException.translate(e); + } + } catch (StorageException storageException) { + if (storageException.getCode() == RestStatus.NOT_FOUND.getStatus()) { + if (lastGeneration != null) { + throw new NoSuchFileException( "Blob object [" + blobId.getName() + "] generation [" + lastGeneration + "] unavailable on resume (contents changed, or object deleted): " + storageException.getMessage() - ) - ); - } else { - throw addSuppressedExceptions( - new NoSuchFileException("Blob object [" + blobId.getName() + "] not found: " + storageException.getMessage()) - ); + ); + } else { + throw new NoSuchFileException("Blob object [" + blobId.getName() + "] not found: " + storageException.getMessage()); + } } - } - if (storageException.getCode() == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) { - long currentPosition = Math.addExact(start, currentOffset); - throw addSuppressedExceptions( - new RequestedRangeNotSatisfiedException( + if (storageException.getCode() == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) { + throw new RequestedRangeNotSatisfiedException( blobId.getName(), - currentPosition, - (end < Long.MAX_VALUE - 1) ? end - currentPosition + 1 : end, + start, + (end < Long.MAX_VALUE - 1) ? end - start + 1 : end, storageException - ) - ); + ); + } + throw storageException; } - throw addSuppressedExceptions(storageException); + } + + @Override + public void onRetryStarted(String action) { + // No retry metrics for GCS + } + + @Override + public void onRetrySucceeded(String action, long numberOfRetries) { + // No retry metrics for GCS + } + + @Override + public long getMeaningfulProgressSize() { + // Any progress is meaningful for GCS + return 1; + } + + @Override + public int getMaxRetries() { + return maxRetries; + } + + @Override + public String getBlobDescription() { + return blobId.toString(); } } - private Long parseGenerationHeader(HttpResponse response) { + private static Long parseGenerationHeader(HttpResponse response) { final String generationHeader = response.getHeaders().getFirstHeaderStringValue("x-goog-generation"); if (generationHeader != null) { try { @@ -213,90 +232,11 @@ private void checkContentLengthOnEOF() throws IOException { } } - @Override - public int read() throws IOException { - ensureOpen(); - while (true) { - try { - final int result = currentStream.read(); - currentOffset += 1; - return result; - } catch (IOException e) { - reopenStreamOrFail(StorageException.translate(e)); - } - } - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - ensureOpen(); - while (true) { - try { - final int bytesRead = currentStream.read(b, off, len); - if (bytesRead == -1) { - return -1; - } - currentOffset += bytesRead; - return bytesRead; - } catch (IOException e) { - reopenStreamOrFail(StorageException.translate(e)); - } - } - } - /** * Close the current stream, used to test resume */ // @VisibleForTesting void closeCurrentStream() throws IOException { - currentStream.close(); - } - - private void ensureOpen() { - if (closed) { - assert false : "using GoogleCloudStorageRetryingInputStream after close"; - throw new IllegalStateException("using GoogleCloudStorageRetryingInputStream after close"); - } - } - - private void reopenStreamOrFail(StorageException e) throws IOException { - if (attempt >= maxAttempts) { - throw addSuppressedExceptions(e); - } - logger.debug( - () -> format("failed reading [%s] at offset [%s], attempt [%s] of [%s], retrying", blobId, currentOffset, attempt, maxAttempts), - e - ); - attempt += 1; - if (failures.size() < MAX_SUPPRESSED_EXCEPTIONS) { - failures.add(e); - } - IOUtils.closeWhileHandlingException(currentStream); - currentStream = openStream(); - } - - @Override - public void close() throws IOException { - currentStream.close(); - closed = true; - } - - @Override - public long skip(long n) throws IOException { - // This could be optimized on a failure by re-opening stream directly to the preferred location. However, it is rarely called, - // so for now we will rely on the default implementation which just discards bytes by reading. - return super.skip(n); - } - - @Override - public void reset() { - throw new UnsupportedOperationException("GoogleCloudStorageRetryingInputStream does not support seeking"); - } - - private T addSuppressedExceptions(T e) { - for (StorageException failure : failures) { - e.addSuppressed(failure); - } - return e; + currentStream.inputStream().close(); } } diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java index 2b223c7a16342..c3dbaf79672d0 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java @@ -131,8 +131,10 @@ private GoogleCloudStorageRetryingInputStream createRetryingInputStream(byte[] d HttpRequest httpRequest = transport.createRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); HttpResponse httpResponse = httpRequest.execute(); when(get.executeMedia()).thenReturn(httpResponse); + final GoogleCloudStorageBlobStore mockBlobStore = mock(GoogleCloudStorageBlobStore.class); + when(mockBlobStore.client()).thenReturn(meteredStorage); - return new GoogleCloudStorageRetryingInputStream(OperationPurpose.SNAPSHOT_DATA, meteredStorage, blobId); + return new GoogleCloudStorageRetryingInputStream(mockBlobStore, OperationPurpose.SNAPSHOT_DATA, blobId); } private GoogleCloudStorageRetryingInputStream createRetryingInputStream(byte[] data, int position, int length) throws IOException { @@ -148,9 +150,12 @@ private GoogleCloudStorageRetryingInputStream createRetryingInputStream(byte[] d when(get.executeMedia()).thenReturn(httpResponse); } + final GoogleCloudStorageBlobStore mockBlobStore = mock(GoogleCloudStorageBlobStore.class); + when(mockBlobStore.client()).thenReturn(meteredStorage); + return new GoogleCloudStorageRetryingInputStream( + mockBlobStore, OperationPurpose.SNAPSHOT_DATA, - meteredStorage, blobId, position, position + length - 1 From 3670b9927f7efc51a43614816962f3653a107070 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 7 Nov 2025 17:20:17 +1100 Subject: [PATCH 12/31] Fix tests --- ...GoogleCloudStorageRetryingInputStream.java | 2 +- .../s3/S3RetryingInputStream.java | 2 -- .../s3/S3BlobContainerRetriesTests.java | 24 ------------------- .../common/blobstore/RetryingInputStream.java | 2 +- .../AbstractBlobContainerRetriesTestCase.java | 16 +++++++++---- 5 files changed, 13 insertions(+), 33 deletions(-) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index 618cafeae3d61..14c6cc3d88406 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -53,7 +53,7 @@ class GoogleCloudStorageRetryingInputStream extends RetryingInputStream { blobStore, purpose, blobId, - blobStore.client().getOptions().getRetrySettings().getMaxAttempts() + blobStore.client().getOptions().getRetrySettings().getMaxAttempts() - 1 ), purpose, start, diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java index a84e91b2d36ea..4eb973f4ad0a1 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RetryingInputStream.java @@ -42,8 +42,6 @@ class S3RetryingInputStream extends RetryingInputStream { private static final Logger logger = LogManager.getLogger(S3RetryingInputStream.class); - static final int MAX_SUPPRESSED_EXCEPTIONS = 10; - S3RetryingInputStream(OperationPurpose purpose, S3BlobStore blobStore, String blobKey) throws IOException { this(purpose, blobStore, blobKey, 0, Long.MAX_VALUE - 1); } diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobContainerRetriesTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobContainerRetriesTests.java index 4da5cb43e7f44..5bec24797185c 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -51,7 +51,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase; -import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.telemetry.InstrumentType; import org.elasticsearch.telemetry.Measurement; @@ -60,7 +59,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLog; import org.elasticsearch.watcher.ResourceWatcherService; -import org.hamcrest.Matcher; import org.junit.After; import org.junit.Before; import org.mockito.Mockito; @@ -102,19 +100,16 @@ import static org.elasticsearch.repositories.s3.S3ClientSettings.MAX_RETRIES_SETTING; import static org.elasticsearch.repositories.s3.S3ClientSettings.READ_TIMEOUT_SETTING; import static org.elasticsearch.repositories.s3.S3ClientSettingsTests.DEFAULT_REGION_UNAVAILABLE; -import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; /** * This class tests how a {@link S3BlobContainer} and its underlying AWS S3 client are retrying requests when reading or writing blobs. @@ -1360,25 +1355,6 @@ private static void assertBase16MD5Digest(String input, String expectedDigestStr assertEquals(expectedDigestString, getBase16MD5Digest(new BytesArray(input))); } - @Override - protected Matcher getMaxRetriesMatcher(int maxRetries) { - // some attempts make meaningful progress and do not count towards the max retry limit - return allOf(greaterThanOrEqualTo(maxRetries), lessThanOrEqualTo(S3RetryingInputStream.MAX_SUPPRESSED_EXCEPTIONS)); - } - - @Override - protected OperationPurpose randomRetryingPurpose() { - return randomValueOtherThan(OperationPurpose.REPOSITORY_ANALYSIS, BlobStoreTestUtil::randomPurpose); - } - - @Override - protected OperationPurpose randomFiniteRetryingPurpose() { - return randomValueOtherThanMany( - purpose -> purpose == OperationPurpose.REPOSITORY_ANALYSIS || purpose == OperationPurpose.INDICES, - BlobStoreTestUtil::randomPurpose - ); - } - private void assertMetricsForOpeningStream() { final long numberOfOperations = getOperationMeasurements(); // S3 client sdk internally also retries within the configured maxRetries for retryable errors. diff --git a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java index 471fd7c6fdd14..84ec852b8add4 100644 --- a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java +++ b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java @@ -28,7 +28,7 @@ public abstract class RetryingInputStream extends InputStream { private static final Logger logger = LogManager.getLogger(RetryingInputStream.class); - static final int MAX_SUPPRESSED_EXCEPTIONS = 10; + public static final int MAX_SUPPRESSED_EXCEPTIONS = 10; private final BlobStoreServices blobStoreServices; private final OperationPurpose purpose; diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java index a70ce9340ad00..82c907807e0cb 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.blobstore.BlobContainer; import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.blobstore.OperationPurpose; +import org.elasticsearch.common.blobstore.RetryingInputStream; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.Streams; import org.elasticsearch.common.unit.ByteSizeValue; @@ -27,6 +28,7 @@ import org.elasticsearch.mocksocket.MockHttpServer; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.fixture.HttpHeaderParser; +import org.hamcrest.Matcher; import org.junit.After; import org.junit.Before; @@ -44,10 +46,10 @@ import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.test.NeverMatcher.never; +import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; @@ -291,16 +293,20 @@ public void testReadBlobWithReadTimeouts() { assertThat(exception.getSuppressed().length, getMaxRetriesMatcher(maxRetries)); } - protected org.hamcrest.Matcher getMaxRetriesMatcher(int maxRetries) { - return equalTo(maxRetries); + protected Matcher getMaxRetriesMatcher(int maxRetries) { + // some attempts make meaningful progress and do not count towards the max retry limit + return allOf(greaterThanOrEqualTo(maxRetries), lessThanOrEqualTo(RetryingInputStream.MAX_SUPPRESSED_EXCEPTIONS)); } protected OperationPurpose randomRetryingPurpose() { - return randomPurpose(); + return randomValueOtherThan(OperationPurpose.REPOSITORY_ANALYSIS, BlobStoreTestUtil::randomPurpose); } protected OperationPurpose randomFiniteRetryingPurpose() { - return randomPurpose(); + return randomValueOtherThanMany( + purpose -> purpose == OperationPurpose.REPOSITORY_ANALYSIS || purpose == OperationPurpose.INDICES, + BlobStoreTestUtil::randomPurpose + ); } public void testReadBlobWithNoHttpResponse() { From 66900b3e08eca06af52075a7127d75768f966170 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 10 Nov 2025 14:21:09 +1100 Subject: [PATCH 13/31] Fix GCS tests --- ...eCloudStorageBlobStoreRepositoryTests.java | 2 +- .../gcs/GoogleCloudStorageClientSettings.java | 25 +++++++++++++++++-- .../gcs/GoogleCloudStoragePlugin.java | 3 ++- ...GoogleCloudStorageRetryingInputStream.java | 25 +++++++++++-------- .../gcs/GoogleCloudStorageService.java | 4 ++- ...leCloudStorageBlobContainerStatsTests.java | 4 ++- ...GoogleCloudStorageClientSettingsTests.java | 17 ++++++++++--- 7 files changed, 60 insertions(+), 20 deletions(-) diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 2532a9212f9e2..4b76ddc3e1afa 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -255,7 +255,7 @@ StorageOptions createStorageOptions( .setInitialRetryDelay(Duration.ofMillis(10L)) .setRetryDelayMultiplier(options.getRetrySettings().getRetryDelayMultiplier()) .setMaxRetryDelay(Duration.ofSeconds(1L)) - .setMaxAttempts(0) + .setMaxAttempts(6) .setJittered(false) .setInitialRpcTimeout(options.getRetrySettings().getInitialRpcTimeout()) .setRpcTimeoutMultiplier(options.getRetrySettings().getRpcTimeoutMultiplier()) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java index 582e0b48121fa..562213ba77f84 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java @@ -120,6 +120,18 @@ public class GoogleCloudStorageClientSettings { () -> PROXY_HOST_SETTING ); + /** + * The maximum number of retries to use when a GCS request fails. + *

+ * Default to 5 to match {@link com.google.cloud.ServiceOptions#getDefaultRetrySettings()} + */ + static final Setting.AffixSetting MAX_RETRIES_SETTING = Setting.affixKeySetting( + PREFIX, + "max_retries", + (key) -> Setting.intSetting(key, 5, 0, Setting.Property.NodeScope), + () -> PROXY_HOST_SETTING + ); + /** The credentials used by the client to connect to the Storage endpoint. */ private final ServiceAccountCredentials credential; @@ -144,6 +156,8 @@ public class GoogleCloudStorageClientSettings { @Nullable private final Proxy proxy; + private final int maxRetries; + GoogleCloudStorageClientSettings( final ServiceAccountCredentials credential, final String endpoint, @@ -152,7 +166,8 @@ public class GoogleCloudStorageClientSettings { final TimeValue readTimeout, final String applicationName, final URI tokenUri, - final Proxy proxy + final Proxy proxy, + final int maxRetries ) { this.credential = credential; this.endpoint = endpoint; @@ -162,6 +177,7 @@ public class GoogleCloudStorageClientSettings { this.applicationName = applicationName; this.tokenUri = tokenUri; this.proxy = proxy; + this.maxRetries = maxRetries; } public ServiceAccountCredentials getCredential() { @@ -197,6 +213,10 @@ public Proxy getProxy() { return proxy; } + public int getMaxRetries() { + return maxRetries; + } + @Override public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; @@ -249,7 +269,8 @@ static GoogleCloudStorageClientSettings getClientSettings(final Settings setting getConfigValue(settings, clientName, READ_TIMEOUT_SETTING), getConfigValue(settings, clientName, APPLICATION_NAME_SETTING), getConfigValue(settings, clientName, TOKEN_URI_SETTING), - proxy + proxy, + getConfigValue(settings, clientName, MAX_RETRIES_SETTING) ); } diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java index 2c35aac1a46e8..5b930337bc447 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java @@ -93,7 +93,8 @@ public List> getSettings() { GoogleCloudStorageClientSettings.TOKEN_URI_SETTING, GoogleCloudStorageClientSettings.PROXY_TYPE_SETTING, GoogleCloudStorageClientSettings.PROXY_HOST_SETTING, - GoogleCloudStorageClientSettings.PROXY_PORT_SETTING + GoogleCloudStorageClientSettings.PROXY_PORT_SETTING, + GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING ); } diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index 14c6cc3d88406..e33a9029e5026 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -16,6 +16,7 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.blobstore.RetryingInputStream; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; import org.elasticsearch.rest.RestStatus; @@ -25,6 +26,8 @@ import java.io.InputStream; import java.nio.file.NoSuchFileException; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; + /** * Wrapper around reads from GCS that will retry blob downloads that fail part-way through, resuming from where the failure occurred. * This should be handled by the SDK but it isn't today. This should be revisited in the future (e.g. before removing @@ -48,17 +51,17 @@ class GoogleCloudStorageRetryingInputStream extends RetryingInputStream { long start, long end ) throws IOException { - super( - new GoogleCloudStorageBlobStoreServices( - blobStore, - purpose, - blobId, - blobStore.client().getOptions().getRetrySettings().getMaxAttempts() - 1 - ), - purpose, - start, - end - ); + super(new GoogleCloudStorageBlobStoreServices(blobStore, purpose, blobId, calculateMaxRetries(blobStore)), purpose, start, end); + } + + private static int calculateMaxRetries(GoogleCloudStorageBlobStore blobStore) throws IOException { + final int maxAttempts = blobStore.client().getOptions().getRetrySettings().getMaxAttempts(); + if (maxAttempts == 0) { + assert false : "Time-based retrying not supported"; + return MAX_RETRIES_SETTING.getDefault(Settings.EMPTY); + } + // GCS client is configured by specifying the number of attempts, the number of retries is that minus one + return maxAttempts - 1; } private static class GoogleCloudStorageBlobStoreServices implements BlobStoreServices { diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java index cb6b8b334181e..c1853eff44b34 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java @@ -15,6 +15,7 @@ import com.google.api.client.http.javanet.NetHttpTransport; import com.google.api.client.util.SecurityUtils; import com.google.api.gax.retrying.ResultRetryAlgorithm; +import com.google.api.gax.retrying.RetrySettings; import com.google.auth.oauth2.GoogleCredentials; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.ServiceOptions; @@ -198,7 +199,8 @@ StorageOptions createStorageOptions( return Strings.hasLength(gcsClientSettings.getApplicationName()) ? Map.of("user-agent", gcsClientSettings.getApplicationName()) : Map.of(); - }); + }) + .setRetrySettings(RetrySettings.newBuilder().setMaxAttempts(gcsClientSettings.getMaxRetries() + 1).build()); if (Strings.hasLength(gcsClientSettings.getHost())) { storageOptionsBuilder.setHost(gcsClientSettings.getHost()); } diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerStatsTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerStatsTests.java index 7e2b4348823fd..6938393077b0b 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerStatsTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerStatsTests.java @@ -53,6 +53,7 @@ import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.APPLICATION_NAME_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CONNECT_TIMEOUT_SETTING; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.PROJECT_ID_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.READ_TIMEOUT_SETTING; import static org.elasticsearch.repositories.gcs.StorageOperation.GET; @@ -253,7 +254,8 @@ private ContainerAndBlobStore createBlobContainer(final String repositoryName) t READ_TIMEOUT_SETTING.getDefault(Settings.EMPTY), APPLICATION_NAME_SETTING.getDefault(Settings.EMPTY), new URI(getEndpointForServer(httpServer) + "/token"), - null + null, + MAX_RETRIES_SETTING.getDefault(Settings.EMPTY) ); googleCloudStorageService.refreshAndClearCache(Map.of(clientName, clientSettings)); final GoogleCloudStorageBlobStore blobStore = new GoogleCloudStorageBlobStore( diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettingsTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettingsTests.java index ac69afe7def6a..e0dcb5040ff8d 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettingsTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettingsTests.java @@ -40,6 +40,7 @@ import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CONNECT_TIMEOUT_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.ENDPOINT_SETTING; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.PROJECT_ID_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.READ_TIMEOUT_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.getClientSettings; @@ -123,7 +124,8 @@ public void testProjectIdDefaultsToCredentials() throws Exception { READ_TIMEOUT_SETTING.getDefault(Settings.EMPTY), APPLICATION_NAME_SETTING.getDefault(Settings.EMPTY), new URI(""), - null + null, + MAX_RETRIES_SETTING.getDefault(Settings.EMPTY) ); assertEquals(credential.getProjectId(), googleCloudStorageClientSettings.getProjectId()); } @@ -140,7 +142,8 @@ public void testLoadsProxySettings() throws Exception { READ_TIMEOUT_SETTING.getDefault(Settings.EMPTY), APPLICATION_NAME_SETTING.getDefault(Settings.EMPTY), new URI(""), - proxy + proxy, + MAX_RETRIES_SETTING.getDefault(Settings.EMPTY) ); assertEquals(proxy, googleCloudStorageClientSettings.getProxy()); } @@ -278,6 +281,13 @@ private static GoogleCloudStorageClientSettings randomClient( applicationName = APPLICATION_NAME_SETTING.getDefault(Settings.EMPTY); } + int maxRetries; + if (randomBoolean()) { + maxRetries = randomIntBetween(0, 5); + } else { + maxRetries = MAX_RETRIES_SETTING.getDefault(Settings.EMPTY); + } + return new GoogleCloudStorageClientSettings( credential, endpoint, @@ -286,7 +296,8 @@ private static GoogleCloudStorageClientSettings randomClient( readTimeout, applicationName, new URI(""), - null + null, + maxRetries ); } From d775068deee9d8eb2e2a04ae434f2346009bce9a Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 10 Nov 2025 16:48:35 +1100 Subject: [PATCH 14/31] Fix test for exposed settings --- .../repositories/gcs/GoogleCloudStoragePluginTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java index 7b9ef189b7459..e96746dabb536 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java @@ -42,7 +42,8 @@ public void testExposedSettings() { "gcs.client.*.token_uri", "gcs.client.*.proxy.type", "gcs.client.*.proxy.host", - "gcs.client.*.proxy.port" + "gcs.client.*.proxy.port", + "gcs.client.*.max_retries" ), settings.stream().map(Setting::getKey).toList() ); From 6ccd45855de85a0be6ecada914508067ac85f4ca Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 11 Nov 2025 12:50:38 +1100 Subject: [PATCH 15/31] Use randomRetryingPurpose whenever we readBlob and care about the results --- .../azure/AzureBlobStoreRepositoryTests.java | 8 ++++++-- .../azure/AzureBlobContainerRetriesTests.java | 11 ++++++----- .../repositories/azure/AzureSasTokenTests.java | 4 ++-- .../GoogleCloudStorageBlobStoreRepositoryTests.java | 5 +++-- .../gcs/GoogleCloudStorageThirdPartyTests.java | 3 ++- .../GoogleCloudStorageBlobContainerRetriesTests.java | 6 +++--- .../repositories/s3/S3RepositoryThirdPartyTests.java | 5 +++-- .../AbstractBlobContainerRetriesTestCase.java | 2 +- .../blobstore/ESBlobStoreRepositoryIntegTestCase.java | 7 ++++--- 9 files changed, 30 insertions(+), 21 deletions(-) diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java index f852284d21b2a..48f286737b4da 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java @@ -64,6 +64,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.repositories.RepositoriesMetrics.METRIC_REQUESTS_TOTAL; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -347,7 +348,10 @@ public void testDeleteBlobsIgnoringIfNotExists() throws Exception { public void testNotFoundErrorMessageContainsFullKey() throws Exception { try (BlobStore store = newBlobStore()) { BlobContainer container = store.blobContainer(BlobPath.EMPTY.add("nested").add("dir")); - NoSuchFileException exception = expectThrows(NoSuchFileException.class, () -> container.readBlob(randomPurpose(), "blob")); + NoSuchFileException exception = expectThrows( + NoSuchFileException.class, + () -> container.readBlob(randomRetryingPurpose(), "blob") + ); assertThat(exception.getMessage(), containsString("nested/dir/blob] not found")); } } @@ -360,7 +364,7 @@ public void testReadByteByByte() throws Exception { container.writeBlob(randomPurpose(), blobName, new ByteArrayInputStream(data), data.length, true); var originalDataInputStream = new ByteArrayInputStream(data); - try (var azureInputStream = container.readBlob(randomPurpose(), blobName)) { + try (var azureInputStream = container.readBlob(randomRetryingPurpose(), blobName)) { for (int i = 0; i < data.length; i++) { assertThat(originalDataInputStream.read(), is(equalTo(azureInputStream.read()))); } diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java index 72a29f2cde01e..c25565d09b1df 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java @@ -44,6 +44,7 @@ import java.util.stream.Collectors; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.repositories.blobstore.ESBlobStoreRepositoryIntegTestCase.randomBytes; import static org.elasticsearch.test.hamcrest.OptionalMatchers.isPresentWith; @@ -64,11 +65,11 @@ public void testReadNonexistentBlobThrowsNoSuchFileException() { final BlobContainer blobContainer = createBlobContainer(between(1, 5)); final Exception exception = expectThrows(NoSuchFileException.class, () -> { if (randomBoolean()) { - blobContainer.readBlob(randomPurpose(), "read_nonexistent_blob"); + blobContainer.readBlob(randomRetryingPurpose(), "read_nonexistent_blob"); } else { final long position = randomLongBetween(0, MAX_RANGE_VAL - 1L); final long length = randomLongBetween(1, MAX_RANGE_VAL - position); - blobContainer.readBlob(randomPurpose(), "read_nonexistent_blob", position, length); + blobContainer.readBlob(randomRetryingPurpose(), "read_nonexistent_blob", position, length); } }); assertThat(exception.toString(), exception.getMessage().toLowerCase(Locale.ROOT), containsString("not found")); @@ -115,7 +116,7 @@ public void testReadBlobWithRetries() throws Exception { }); final BlobContainer blobContainer = createBlobContainer(maxRetries); - try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "read_blob_max_retries")) { + try (InputStream inputStream = blobContainer.readBlob(randomRetryingPurpose(), "read_blob_max_retries")) { assertArrayEquals(bytes, BytesReference.toBytes(Streams.readFully(inputStream))); assertThat(countDownHead.isCountedDown(), is(true)); assertThat(countDownGet.isCountedDown(), is(true)); @@ -161,7 +162,7 @@ public void testReadBlobWithFailuresMidDownload() throws IOException { }); final BlobContainer blobContainer = createBlobContainer(responsesToSend * 2); - try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "read_blob_fail_mid_stream")) { + try (InputStream inputStream = blobContainer.readBlob(randomRetryingPurpose(), "read_blob_fail_mid_stream")) { assertArrayEquals(blobContents, BytesReference.toBytes(Streams.readFully(inputStream))); } } @@ -527,7 +528,7 @@ public void testRetryFromSecondaryLocationPolicies() throws Exception { } final BlobContainer blobContainer = createBlobContainer(maxRetries, secondaryHost, locationMode); - try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "read_blob_from_secondary")) { + try (InputStream inputStream = blobContainer.readBlob(randomRetryingPurpose(), "read_blob_from_secondary")) { assertArrayEquals(bytes, BytesReference.toBytes(Streams.readFully(inputStream))); // It does round robin, first tries on the primary, then on the secondary diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSasTokenTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSasTokenTests.java index b10472688e3b0..0144345ce489a 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSasTokenTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSasTokenTests.java @@ -24,7 +24,7 @@ import static org.elasticsearch.repositories.azure.AzureStorageSettings.ACCOUNT_SETTING; import static org.elasticsearch.repositories.azure.AzureStorageSettings.SAS_TOKEN_SETTING; -import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.lessThan; @@ -78,7 +78,7 @@ public void testSasTokenIsUsedAsProvidedInSettings() throws Exception { }); final BlobContainer blobContainer = createBlobContainer(maxRetries, null, LocationMode.PRIMARY_ONLY, clientName, secureSettings); - try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "sas_test")) { + try (InputStream inputStream = blobContainer.readBlob(randomRetryingPurpose(), "sas_test")) { assertArrayEquals(bytes, BytesReference.toBytes(Streams.readFully(inputStream))); } } diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 4b76ddc3e1afa..55b1a41e7d18b 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -60,6 +60,7 @@ import java.util.Map; import static org.elasticsearch.common.io.Streams.readFully; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.ENDPOINT_SETTING; @@ -195,7 +196,7 @@ public void testWriteReadLarge() throws IOException { random().nextBytes(data); writeBlob(container, "foobar", new BytesArray(data), false); } - try (InputStream stream = container.readBlob(randomPurpose(), "foobar")) { + try (InputStream stream = container.readBlob(randomRetryingPurpose(), "foobar")) { BytesRefBuilder target = new BytesRefBuilder(); while (target.length() < data.length) { byte[] buffer = new byte[scaledRandomIntBetween(1, data.length - target.length())]; @@ -218,7 +219,7 @@ public void testWriteFileMultipleOfChunkSize() throws IOException { byte[] initialValue = randomByteArrayOfLength(uploadSize); container.writeBlob(randomPurpose(), key, new BytesArray(initialValue), true); - BytesReference reference = readFully(container.readBlob(randomPurpose(), key)); + BytesReference reference = readFully(container.readBlob(randomRetryingPurpose(), key)); assertEquals(new BytesArray(initialValue), reference); container.deleteBlobsIgnoringIfNotExists(randomPurpose(), Iterators.single(key)); diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java index 520f922280eb0..0572912e96429 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java @@ -33,6 +33,7 @@ import java.util.Collection; import static org.elasticsearch.common.io.Streams.readFully; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.hamcrest.Matchers.blankOrNullString; import static org.hamcrest.Matchers.containsString; @@ -115,7 +116,7 @@ public void testResumeAfterUpdate() { executeOnBlobStore(repo, container -> { container.writeBlob(randomPurpose(), blobKey, new BytesArray(initialValue), true); - try (InputStream inputStream = container.readBlob(randomPurpose(), blobKey)) { + try (InputStream inputStream = container.readBlob(randomRetryingPurpose(), blobKey)) { // Trigger the first request for the blob, partially read it int read = inputStream.read(); assert read != -1; diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java index a09aa8142ad63..54420b2634466 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java @@ -273,7 +273,7 @@ public void testReadLargeBlobWithRetries() throws Exception { exchange.close(); }); - try (InputStream inputStream = blobContainer.readBlob(randomPurpose(), "large_blob_retries")) { + try (InputStream inputStream = blobContainer.readBlob(randomRetryingPurpose(), "large_blob_retries")) { assertArrayEquals(bytes, BytesReference.toBytes(Streams.readFully(inputStream))); } } @@ -600,10 +600,10 @@ public void testContentsChangeWhileStreaming() throws IOException { byte[] initialValue = randomByteArrayOfLength(enoughBytesToNotBeEntirelyBuffered); container.writeBlob(randomPurpose(), key, new BytesArray(initialValue), true); - BytesReference reference = readFully(container.readBlob(randomPurpose(), key)); + BytesReference reference = readFully(container.readBlob(randomRetryingPurpose(), key)); assertEquals(new BytesArray(initialValue), reference); - try (InputStream inputStream = container.readBlob(randomPurpose(), key)) { + try (InputStream inputStream = container.readBlob(randomRetryingPurpose(), key)) { // Trigger the first chunk to load int read = inputStream.read(); assert read != -1; diff --git a/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java b/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java index f6b37b3f34c89..0f113e8253cc6 100644 --- a/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java +++ b/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java @@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.blankOrNullString; @@ -253,7 +254,7 @@ public void testCopy() { blobBytes.length() ); - return destinationBlobContainer.readBlob(randomPurpose(), destinationBlobName).readAllBytes(); + return destinationBlobContainer.readBlob(randomRetryingPurpose(), destinationBlobName).readAllBytes(); }); assertArrayEquals(BytesReference.toBytes(blobBytes), targetBytes); @@ -280,7 +281,7 @@ public void testMultipartCopy() { blobBytes.length() ); - return destinationBlobContainer.readBlob(randomPurpose(), destinationBlobName).readAllBytes(); + return destinationBlobContainer.readBlob(randomRetryingPurpose(), destinationBlobName).readAllBytes(); }); assertArrayEquals(BytesReference.toBytes(blobBytes), targetBytes); diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java index 82c907807e0cb..4bfa12224765d 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java @@ -298,7 +298,7 @@ protected Matcher getMaxRetriesMatcher(int maxRetries) { return allOf(greaterThanOrEqualTo(maxRetries), lessThanOrEqualTo(RetryingInputStream.MAX_SUPPRESSED_EXCEPTIONS)); } - protected OperationPurpose randomRetryingPurpose() { + public static OperationPurpose randomRetryingPurpose() { return randomValueOtherThan(OperationPurpose.REPOSITORY_ANALYSIS, BlobStoreTestUtil::randomPurpose); } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java index 8870c79218780..309133dbaddb9 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java @@ -60,6 +60,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.READONLY_SETTING_KEY; import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.SNAPSHOT_INDEX_NAME_FORMAT; import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.SNAPSHOT_NAME_FORMAT; @@ -130,7 +131,7 @@ public void testReadNonExistingPath() throws IOException { try (BlobStore store = newBlobStore()) { final BlobContainer container = store.blobContainer(BlobPath.EMPTY); expectThrows(NoSuchFileException.class, () -> { - try (InputStream is = container.readBlob(randomPurpose(), "non-existing")) { + try (InputStream is = container.readBlob(randomRetryingPurpose(), "non-existing")) { is.read(); } }); @@ -157,7 +158,7 @@ public void testWriteMaybeCopyRead() throws IOException { readBlobName = destinationBlobName; } catch (UnsupportedOperationException ignored) {} } - try (InputStream stream = container.readBlob(randomPurpose(), readBlobName)) { + try (InputStream stream = container.readBlob(randomRetryingPurpose(), readBlobName)) { BytesRefBuilder target = new BytesRefBuilder(); while (target.length() < data.length) { byte[] buffer = new byte[scaledRandomIntBetween(1, data.length - target.length())]; @@ -285,7 +286,7 @@ public static byte[] writeRandomBlob(BlobContainer container, String name, int l public static byte[] readBlobFully(BlobContainer container, String name, int length) throws IOException { byte[] data = new byte[length]; - try (InputStream inputStream = container.readBlob(randomPurpose(), name)) { + try (InputStream inputStream = container.readBlob(randomRetryingPurpose(), name)) { assertThat(Streams.readFully(inputStream, data), CoreMatchers.equalTo(length)); assertThat(inputStream.read(), CoreMatchers.equalTo(-1)); } From eecda281df7a5bb0ff1cf9b3093b0405759e84d6 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 18 Nov 2025 17:44:38 +1100 Subject: [PATCH 16/31] Opt out of GCS client retries when reading via retryable stream --- ...eCloudStorageBlobStoreRepositoryTests.java | 9 +- .../gcs/GoogleCloudStorageBlobStore.java | 29 ++++- .../gcs/GoogleCloudStorageClientSettings.java | 3 +- ...GoogleCloudStorageRetryingInputStream.java | 30 +---- .../gcs/GoogleCloudStorageService.java | 106 +++++++++++++----- ...CloudStorageBlobContainerRetriesTests.java | 19 +++- ...leCloudStorageBlobStoreContainerTests.java | 11 +- ...GoogleCloudStorageClientsManagerTests.java | 5 +- .../gcs/GoogleCloudStorageServiceTests.java | 35 ++++-- 9 files changed, 168 insertions(+), 79 deletions(-) diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 55b1a41e7d18b..74ca917c014fc 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -64,6 +64,7 @@ import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.ENDPOINT_SETTING; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.TOKEN_URI_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageRepository.BASE_PATH; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageRepository.BUCKET; @@ -119,6 +120,7 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { settings.put(super.nodeSettings(nodeOrdinal, otherSettings)); settings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl()); settings.put(TOKEN_URI_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl() + "/token"); + settings.put(MAX_RETRIES_SETTING.getConcreteSettingForNamespace("test").getKey(), 5); final MockSecureSettings secureSettings = new MockSecureSettings(); final byte[] serviceAccount = TestUtils.createServiceAccount(random()); @@ -243,9 +245,10 @@ protected GoogleCloudStorageService createStorageService(ClusterService clusterS @Override StorageOptions createStorageOptions( final GoogleCloudStorageClientSettings gcsClientSettings, - final HttpTransportOptions httpTransportOptions + final HttpTransportOptions httpTransportOptions, + final RetryBehaviour retryBehaviour ) { - StorageOptions options = super.createStorageOptions(gcsClientSettings, httpTransportOptions); + StorageOptions options = super.createStorageOptions(gcsClientSettings, httpTransportOptions, retryBehaviour); return options.toBuilder() .setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy()) .setHost(options.getHost()) @@ -256,7 +259,7 @@ StorageOptions createStorageOptions( .setInitialRetryDelay(Duration.ofMillis(10L)) .setRetryDelayMultiplier(options.getRetrySettings().getRetryDelayMultiplier()) .setMaxRetryDelay(Duration.ofSeconds(1L)) - .setMaxAttempts(6) + .setMaxAttempts(options.getRetrySettings().getMaxAttempts()) .setJittered(false) .setInitialRpcTimeout(options.getRetrySettings().getInitialRpcTimeout()) .setRpcTimeoutMultiplier(options.getRetrySettings().getRpcTimeoutMultiplier()) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java index 399026ae5c066..3f76ac5f1a667 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java @@ -139,8 +139,35 @@ class GoogleCloudStorageBlobStore implements BlobStore { this.casBackoffPolicy = casBackoffPolicy; } + /** + * Get a client that will retry according to its configured settings + * + * @return A client + */ MeteredStorage client() throws IOException { - return storageService.client(projectId, clientName, repositoryName, statsCollector); + return storageService.client( + projectId, + clientName, + repositoryName, + statsCollector, + GoogleCloudStorageService.RetryBehaviour.ClientConfigured + ); + } + + /** + * Get a client that will not retry on failure + * + * @return A client with max retries configured to zero + */ + MeteredStorage clientNoRetries() throws IOException { + return storageService.client(projectId, clientName, repositoryName, statsCollector, GoogleCloudStorageService.RetryBehaviour.None); + } + + /** + * @return The settings for the configured client + */ + GoogleCloudStorageClientSettings clientSettings() { + return storageService.clientSettings(projectId, clientName); } @Override diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java index 562213ba77f84..10d557ce2a8aa 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientSettings.java @@ -128,8 +128,7 @@ public class GoogleCloudStorageClientSettings { static final Setting.AffixSetting MAX_RETRIES_SETTING = Setting.affixKeySetting( PREFIX, "max_retries", - (key) -> Setting.intSetting(key, 5, 0, Setting.Property.NodeScope), - () -> PROXY_HOST_SETTING + (key) -> Setting.intSetting(key, 5, 0, Setting.Property.NodeScope) ); /** The credentials used by the client to connect to the Storage endpoint. */ diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index e33a9029e5026..da2ea8af35084 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -16,7 +16,6 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.blobstore.RetryingInputStream; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; import org.elasticsearch.rest.RestStatus; @@ -26,8 +25,6 @@ import java.io.InputStream; import java.nio.file.NoSuchFileException; -import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; - /** * Wrapper around reads from GCS that will retry blob downloads that fail part-way through, resuming from where the failure occurred. * This should be handled by the SDK but it isn't today. This should be revisited in the future (e.g. before removing @@ -51,17 +48,7 @@ class GoogleCloudStorageRetryingInputStream extends RetryingInputStream { long start, long end ) throws IOException { - super(new GoogleCloudStorageBlobStoreServices(blobStore, purpose, blobId, calculateMaxRetries(blobStore)), purpose, start, end); - } - - private static int calculateMaxRetries(GoogleCloudStorageBlobStore blobStore) throws IOException { - final int maxAttempts = blobStore.client().getOptions().getRetrySettings().getMaxAttempts(); - if (maxAttempts == 0) { - assert false : "Time-based retrying not supported"; - return MAX_RETRIES_SETTING.getDefault(Settings.EMPTY); - } - // GCS client is configured by specifying the number of attempts, the number of retries is that minus one - return maxAttempts - 1; + super(new GoogleCloudStorageBlobStoreServices(blobStore, purpose, blobId), purpose, start, end); } private static class GoogleCloudStorageBlobStoreServices implements BlobStoreServices { @@ -69,23 +56,16 @@ private static class GoogleCloudStorageBlobStoreServices implements BlobStoreSer private final GoogleCloudStorageBlobStore blobStore; private final OperationPurpose purpose; private final BlobId blobId; - private final int maxRetries; - - private GoogleCloudStorageBlobStoreServices( - GoogleCloudStorageBlobStore blobStore, - OperationPurpose purpose, - BlobId blobId, - int maxRetries - ) { + + private GoogleCloudStorageBlobStoreServices(GoogleCloudStorageBlobStore blobStore, OperationPurpose purpose, BlobId blobId) { this.blobStore = blobStore; this.purpose = purpose; this.blobId = blobId; - this.maxRetries = maxRetries; } @Override public InputStreamAtVersion getInputStreamAtVersion(@Nullable Long lastGeneration, long start, long end) throws IOException { - final MeteredStorage client = blobStore.client(); + final MeteredStorage client = blobStore.clientNoRetries(); try { try { final var meteredGet = client.meteredObjectsGet(purpose, blobId.getBucket(), blobId.getName()); @@ -160,7 +140,7 @@ public long getMeaningfulProgressSize() { @Override public int getMaxRetries() { - return maxRetries; + return blobStore.clientSettings().getMaxRetries(); } @Override diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java index c1853eff44b34..40baeac8c249e 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java @@ -95,6 +95,23 @@ public void refreshAndClearCache(Map c clientsManager.refreshAndClearCacheForClusterClients(clientsSettings); } + public enum RetryBehaviour { + ClientConfigured { + @Override + public RetrySettings createRetrySettings(GoogleCloudStorageClientSettings settings) { + return RetrySettings.newBuilder().setMaxAttempts(settings.getMaxRetries() + 1).build(); + } + }, + None { + @Override + public RetrySettings createRetrySettings(GoogleCloudStorageClientSettings settings) { + return RetrySettings.newBuilder().setMaxAttempts(1).build(); + } + }; + + public abstract RetrySettings createRetrySettings(GoogleCloudStorageClientSettings settings); + } + /** * Attempts to retrieve a client from the cache. If the client does not exist it * will be created from the latest settings and will populate the cache. The @@ -112,9 +129,14 @@ public MeteredStorage client( @Nullable final ProjectId projectId, final String clientName, final String repositoryName, - final GcsRepositoryStatsCollector statsCollector + final GcsRepositoryStatsCollector statsCollector, + final RetryBehaviour retryBehaviour ) throws IOException { - return clientsManager.client(projectId, clientName, repositoryName, statsCollector); + return clientsManager.client(projectId, clientName, repositoryName, statsCollector, retryBehaviour); + } + + GoogleCloudStorageClientSettings clientSettings(@Nullable ProjectId projectId, final String clientName) { + return clientsManager.clientSettings(projectId, clientName); } /** @@ -139,8 +161,11 @@ GoogleCloudStorageClientsManager getClientsManager() { * @return a new client storage instance that can be used to manage objects * (blobs) */ - private MeteredStorage createClient(GoogleCloudStorageClientSettings gcsClientSettings, GcsRepositoryStatsCollector statsCollector) - throws IOException { + private MeteredStorage createClient( + GoogleCloudStorageClientSettings gcsClientSettings, + GcsRepositoryStatsCollector statsCollector, + RetryBehaviour retryBehaviour + ) throws IOException { final NetHttpTransport.Builder builder = new NetHttpTransport.Builder(); // requires java.lang.RuntimePermission "setFactory" @@ -184,13 +209,14 @@ public HttpRequestInitializer getHttpRequestInitializer(ServiceOptions ser } }; - final StorageOptions storageOptions = createStorageOptions(gcsClientSettings, httpTransportOptions); + final StorageOptions storageOptions = createStorageOptions(gcsClientSettings, httpTransportOptions, retryBehaviour); return new MeteredStorage(storageOptions.getService(), statsCollector); } StorageOptions createStorageOptions( final GoogleCloudStorageClientSettings gcsClientSettings, - final HttpTransportOptions httpTransportOptions + final HttpTransportOptions httpTransportOptions, + final RetryBehaviour retryBehaviour ) { final StorageOptions.Builder storageOptionsBuilder = StorageOptions.newBuilder() .setStorageRetryStrategy(getRetryStrategy()) @@ -200,7 +226,7 @@ StorageOptions createStorageOptions( ? Map.of("user-agent", gcsClientSettings.getApplicationName()) : Map.of(); }) - .setRetrySettings(RetrySettings.newBuilder().setMaxAttempts(gcsClientSettings.getMaxRetries() + 1).build()); + .setRetrySettings(retryBehaviour.createRetrySettings(gcsClientSettings)); if (Strings.hasLength(gcsClientSettings.getHost())) { storageOptionsBuilder.setHost(gcsClientSettings.getHost()); } @@ -405,12 +431,25 @@ void refreshAndClearCacheForClusterClients(Map clientCache = emptyMap(); + protected volatile Map clientCache = emptyMap(); /** * Get the current client settings for all clients in this holder. @@ -478,46 +519,57 @@ abstract class ClientsHolder { * @return a cached client storage instance that can be used to manage objects * (blobs) */ - MeteredStorage client(final String clientName, final String repositoryName, final GcsRepositoryStatsCollector statsCollector) - throws IOException { + MeteredStorage client( + final String clientName, + final String repositoryName, + final GcsRepositoryStatsCollector statsCollector, + final RetryBehaviour retryBehaviour + ) throws IOException { + final var cacheKey = new CacheKey(repositoryName, retryBehaviour); { - final MeteredStorage storage = clientCache.get(repositoryName); + final MeteredStorage storage = clientCache.get(cacheKey); if (storage != null) { return storage; } } synchronized (this) { - final MeteredStorage existing = clientCache.get(repositoryName); + final MeteredStorage existing = clientCache.get(cacheKey); if (existing != null) { return existing; } - final GoogleCloudStorageClientSettings settings = allClientSettings().get(clientName); - - if (settings == null) { - throw new IllegalArgumentException( - "Unknown client name [" + clientName + "]. Existing client configs: " + allClientSettings().keySet() - ); - } + final GoogleCloudStorageClientSettings settings = clientSettings(clientName); logger.debug(() -> format("creating GCS client with client_name [%s], endpoint [%s]", clientName, settings.getHost())); - final MeteredStorage storage = createClient(settings, statsCollector); - clientCache = Maps.copyMapWithAddedEntry(clientCache, repositoryName, storage); + final MeteredStorage storage = createClient(settings, statsCollector, retryBehaviour); + clientCache = Maps.copyMapWithAddedEntry(clientCache, cacheKey, storage); return storage; } } + public GoogleCloudStorageClientSettings clientSettings(String clientName) { + final GoogleCloudStorageClientSettings settings = allClientSettings().get(clientName); + + if (settings == null) { + throw new IllegalArgumentException( + "Unknown client name [" + clientName + "]. Existing client configs: " + allClientSettings().keySet() + ); + } + + return settings; + } + synchronized void closeRepositoryClients(String repositoryName) { clientCache = clientCache.entrySet() .stream() - .filter(entry -> entry.getKey().equals(repositoryName) == false) + .filter(entry -> entry.getKey().repositoryName().equals(repositoryName) == false) .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); } // package private for tests final boolean hasCachedClientForRepository(String repositoryName) { - return clientCache.containsKey(repositoryName); + return clientCache.keySet().stream().anyMatch(key -> key.repositoryName().equals(repositoryName)); } } diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java index 54420b2634466..48cf6b5d96d90 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java @@ -85,6 +85,7 @@ import static org.elasticsearch.repositories.gcs.GoogleCloudStorageBlobStore.MAX_DELETES_PER_BATCH; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.ENDPOINT_SETTING; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.READ_TIMEOUT_SETTING; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.TOKEN_URI_SETTING; import static org.hamcrest.Matchers.anyOf; @@ -146,6 +147,9 @@ protected BlobContainer createBlobContainer( if (readTimeout != null) { clientSettings.put(READ_TIMEOUT_SETTING.getConcreteSettingForNamespace(client).getKey(), readTimeout); } + if (maxRetries != null) { + clientSettings.put(MAX_RETRIES_SETTING.getConcreteSettingForNamespace(client).getKey(), maxRetries); + } final MockSecureSettings secureSettings = new MockSecureSettings(); secureSettings.setFile(CREDENTIALS_FILE_SETTING.getConcreteSettingForNamespace(client).getKey(), createServiceAccount(random())); @@ -155,7 +159,8 @@ protected BlobContainer createBlobContainer( @Override StorageOptions createStorageOptions( final GoogleCloudStorageClientSettings gcsClientSettings, - final HttpTransportOptions httpTransportOptions + final HttpTransportOptions httpTransportOptions, + final RetryBehaviour retryBehaviour ) { final HttpTransportOptions requestCountingHttpTransportOptions = new HttpTransportOptions( HttpTransportOptions.newBuilder() @@ -182,7 +187,11 @@ public HttpRequestInitializer getHttpRequestInitializer(ServiceOptions ser }; } }; - final StorageOptions options = super.createStorageOptions(gcsClientSettings, requestCountingHttpTransportOptions); + final StorageOptions options = super.createStorageOptions( + gcsClientSettings, + requestCountingHttpTransportOptions, + retryBehaviour + ); final RetrySettings.Builder retrySettingsBuilder = RetrySettings.newBuilder() .setTotalTimeout(options.getRetrySettings().getTotalTimeout()) .setInitialRetryDelay(Duration.ofMillis(10L)) @@ -191,10 +200,8 @@ public HttpRequestInitializer getHttpRequestInitializer(ServiceOptions ser .setJittered(false) .setInitialRpcTimeout(Duration.ofSeconds(1)) .setRpcTimeoutMultiplier(options.getRetrySettings().getRpcTimeoutMultiplier()) - .setMaxRpcTimeout(Duration.ofSeconds(1)); - if (maxRetries != null) { - retrySettingsBuilder.setMaxAttempts(maxRetries + 1); - } + .setMaxRpcTimeout(Duration.ofSeconds(1)) + .setMaxAttempts(options.getRetrySettings().getMaxAttempts()); return options.toBuilder() .setStorageRetryStrategy(getRetryStrategy()) .setHost(options.getHost()) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreContainerTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreContainerTests.java index 849c09c506078..e6c854c23be9d 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreContainerTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreContainerTests.java @@ -82,8 +82,15 @@ public void testDeleteBlobsIgnoringIfNotExistsThrowsIOException() throws Excepti final MeteredStorage meteredStorage = new MeteredStorage(storage, storageRpc, new GcsRepositoryStatsCollector()); final GoogleCloudStorageService storageService = mock(GoogleCloudStorageService.class); - when(storageService.client(eq(ProjectId.DEFAULT), any(String.class), any(String.class), any(GcsRepositoryStatsCollector.class))) - .thenReturn(meteredStorage); + when( + storageService.client( + eq(ProjectId.DEFAULT), + any(String.class), + any(String.class), + any(GcsRepositoryStatsCollector.class), + any(GoogleCloudStorageService.RetryBehaviour.class) + ) + ).thenReturn(meteredStorage); try ( BlobStore store = new GoogleCloudStorageBlobStore( diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java index a3a538b82aa6f..8f9415132083b 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java @@ -43,6 +43,7 @@ import java.util.stream.Stream; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageService.RetryBehaviour.ClientConfigured; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsInAnyOrder; @@ -294,11 +295,11 @@ public void testProjectClientsDisabled() throws IOException { } private MeteredStorage getClientFromManager(ProjectId projectId, String clientName) throws IOException { - return gcsClientsManager.client(projectId, clientName, repoNameForClient(clientName), statsCollector); + return gcsClientsManager.client(projectId, clientName, repoNameForClient(clientName), statsCollector, ClientConfigured); } private MeteredStorage getClientFromService(ProjectId projectId, String clientName) throws IOException { - return googleCloudStorageService.client(projectId, clientName, repoNameForClient(clientName), statsCollector); + return googleCloudStorageService.client(projectId, clientName, repoNameForClient(clientName), statsCollector, ClientConfigured); } private ProjectId projectIdForClusterClient() { diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageServiceTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageServiceTests.java index bf4745a16b41c..369d820a638a0 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageServiceTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageServiceTests.java @@ -40,6 +40,7 @@ import java.util.Locale; import java.util.UUID; +import static org.elasticsearch.repositories.gcs.GoogleCloudStorageService.RetryBehaviour.ClientConfigured; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -58,6 +59,7 @@ public void testClientInitializer() throws Exception { + ":" + randomIntBetween(1, 65535); final String projectIdName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); + final int maxRetries = randomIntBetween(0, 10); final Settings settings = Settings.builder() .put( GoogleCloudStorageClientSettings.CONNECT_TIMEOUT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), @@ -76,6 +78,7 @@ public void testClientInitializer() throws Exception { .put(GoogleCloudStorageClientSettings.PROXY_TYPE_SETTING.getConcreteSettingForNamespace(clientName).getKey(), "HTTP") .put(GoogleCloudStorageClientSettings.PROXY_HOST_SETTING.getConcreteSettingForNamespace(clientName).getKey(), "192.168.52.15") .put(GoogleCloudStorageClientSettings.PROXY_PORT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), 8080) + .put(GoogleCloudStorageClientSettings.MAX_RETRIES_SETTING.getConcreteSettingForNamespace(clientName).getKey(), maxRetries) .build(); SetOnce proxy = new SetOnce<>(); final var clusterService = ClusterServiceUtils.createClusterService(new DeterministicTaskQueue().getThreadPool()); @@ -89,17 +92,18 @@ void notifyProxyIsSet(Proxy p) { var statsCollector = new GcsRepositoryStatsCollector(); final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> service.client(projectIdForClusterClient(), "another_client", "repo", statsCollector) + () -> service.client(projectIdForClusterClient(), "another_client", "repo", statsCollector, randomRetryBehaviour()) ); assertThat(e.getMessage(), Matchers.startsWith("Unknown client name")); assertSettingDeprecationsAndWarnings( new Setting[] { GoogleCloudStorageClientSettings.APPLICATION_NAME_SETTING.getConcreteSettingForNamespace(clientName) } ); - final var storage = service.client(projectIdForClusterClient(), clientName, "repo", statsCollector); + final var storage = service.client(projectIdForClusterClient(), clientName, "repo", statsCollector, ClientConfigured); assertThat(storage.getOptions().getApplicationName(), Matchers.containsString(applicationName)); assertThat(storage.getOptions().getHost(), Matchers.is(endpoint)); assertThat(storage.getOptions().getProjectId(), Matchers.is(projectIdName)); assertThat(storage.getOptions().getTransportOptions(), Matchers.instanceOf(HttpTransportOptions.class)); + assertThat(storage.getOptions().getRetrySettings().getMaxAttempts(), equalTo(maxRetries + 1)); assertThat( ((HttpTransportOptions) storage.getOptions().getTransportOptions()).getConnectTimeout(), Matchers.is((int) connectTimeValue.millis()) @@ -125,18 +129,19 @@ public void testReinitClientSettings() throws Exception { ClusterServiceUtils.createClusterService(new DeterministicTaskQueue().getThreadPool()) ); when(pluginServices.projectResolver()).thenReturn(TestProjectResolvers.DEFAULT_PROJECT_ONLY); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); try (GoogleCloudStoragePlugin plugin = new GoogleCloudStoragePlugin(settings1)) { plugin.createComponents(pluginServices); final GoogleCloudStorageService storageService = plugin.storageService.get(); var statsCollector = new GcsRepositoryStatsCollector(); - final var client11 = storageService.client(projectIdForClusterClient(), "gcs1", "repo1", statsCollector); + final var client11 = storageService.client(projectIdForClusterClient(), "gcs1", "repo1", statsCollector, retryBehaviour); assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); - final var client12 = storageService.client(projectIdForClusterClient(), "gcs2", "repo2", statsCollector); + final var client12 = storageService.client(projectIdForClusterClient(), "gcs2", "repo2", statsCollector, retryBehaviour); assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); // client 3 is missing final IllegalArgumentException e1 = expectThrows( IllegalArgumentException.class, - () -> storageService.client(projectIdForClusterClient(), "gcs3", "repo3", statsCollector) + () -> storageService.client(projectIdForClusterClient(), "gcs3", "repo3", statsCollector, retryBehaviour) ); assertThat(e1.getMessage(), containsString("Unknown client name [gcs3].")); // update client settings @@ -144,18 +149,18 @@ public void testReinitClientSettings() throws Exception { // old client 1 not changed assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); // new client 1 is changed - final var client21 = storageService.client(projectIdForClusterClient(), "gcs1", "repo1", statsCollector); + final var client21 = storageService.client(projectIdForClusterClient(), "gcs1", "repo1", statsCollector, retryBehaviour); assertThat(client21.getOptions().getProjectId(), equalTo("project_gcs21")); // old client 2 not changed assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); // new client2 is gone final IllegalArgumentException e2 = expectThrows( IllegalArgumentException.class, - () -> storageService.client(projectIdForClusterClient(), "gcs2", "repo2", statsCollector) + () -> storageService.client(projectIdForClusterClient(), "gcs2", "repo2", statsCollector, retryBehaviour) ); assertThat(e2.getMessage(), containsString("Unknown client name [gcs2].")); // client 3 emerged - final var client23 = storageService.client(projectIdForClusterClient(), "gcs3", "repo3", statsCollector); + final var client23 = storageService.client(projectIdForClusterClient(), "gcs3", "repo3", statsCollector, retryBehaviour); assertThat(client23.getOptions().getProjectId(), equalTo("project_gcs23")); } } @@ -173,23 +178,27 @@ public void testClientsAreNotSharedAcrossRepositories() throws Exception { plugin.createComponents(pluginServices); final GoogleCloudStorageService storageService = plugin.storageService.get(); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); final MeteredStorage repo1Client = storageService.client( projectIdForClusterClient(), "gcs1", "repo1", - new GcsRepositoryStatsCollector() + new GcsRepositoryStatsCollector(), + retryBehaviour ); final MeteredStorage repo2Client = storageService.client( projectIdForClusterClient(), "gcs1", "repo2", - new GcsRepositoryStatsCollector() + new GcsRepositoryStatsCollector(), + retryBehaviour ); final MeteredStorage repo1ClientSecondInstance = storageService.client( projectIdForClusterClient(), "gcs1", "repo1", - new GcsRepositoryStatsCollector() + new GcsRepositoryStatsCollector(), + retryBehaviour ); assertNotSame(repo1Client, repo2Client); @@ -240,4 +249,8 @@ public void handle(HttpRequest request, HttpResponse response, HttpContext conte private ProjectId projectIdForClusterClient() { return randomBoolean() ? ProjectId.DEFAULT : null; } + + private static GoogleCloudStorageService.RetryBehaviour randomRetryBehaviour() { + return randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + } } From 6fcef8071fb0bae3c58fb237adb0ecae19fbf446 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 19 Nov 2025 13:53:14 +1100 Subject: [PATCH 17/31] Increase max retries to accommodate random failures --- ...GoogleCloudStorageBlobStoreRepositoryTests.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 74ca917c014fc..3fec77c962b87 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -13,7 +13,6 @@ import fixture.gcs.GoogleCloudStorageHttpHandler; import fixture.gcs.TestUtils; -import com.google.api.gax.retrying.RetrySettings; import com.google.cloud.http.HttpTransportOptions; import com.google.cloud.storage.StorageOptions; import com.google.cloud.storage.StorageRetryStrategy; @@ -120,7 +119,7 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { settings.put(super.nodeSettings(nodeOrdinal, otherSettings)); settings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl()); settings.put(TOKEN_URI_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl() + "/token"); - settings.put(MAX_RETRIES_SETTING.getConcreteSettingForNamespace("test").getKey(), 5); + settings.put(MAX_RETRIES_SETTING.getConcreteSettingForNamespace("test").getKey(), 6); final MockSecureSettings secureSettings = new MockSecureSettings(); final byte[] serviceAccount = TestUtils.createServiceAccount(random()); @@ -251,19 +250,12 @@ StorageOptions createStorageOptions( StorageOptions options = super.createStorageOptions(gcsClientSettings, httpTransportOptions, retryBehaviour); return options.toBuilder() .setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy()) - .setHost(options.getHost()) - .setCredentials(options.getCredentials()) .setRetrySettings( - RetrySettings.newBuilder() - .setTotalTimeout(options.getRetrySettings().getTotalTimeout()) + options.getRetrySettings() + .toBuilder() .setInitialRetryDelay(Duration.ofMillis(10L)) - .setRetryDelayMultiplier(options.getRetrySettings().getRetryDelayMultiplier()) .setMaxRetryDelay(Duration.ofSeconds(1L)) - .setMaxAttempts(options.getRetrySettings().getMaxAttempts()) .setJittered(false) - .setInitialRpcTimeout(options.getRetrySettings().getInitialRpcTimeout()) - .setRpcTimeoutMultiplier(options.getRetrySettings().getRpcTimeoutMultiplier()) - .setMaxRpcTimeout(options.getRetrySettings().getMaxRpcTimeout()) .build() ) .build(); From 7634e66a70fb7bc74af94a3ad1b998c3632ec6e9 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 19 Nov 2025 15:02:19 +1100 Subject: [PATCH 18/31] Fix test --- .../repositories/gcs/GoogleCloudStorageBlobStore.java | 7 ++----- .../gcs/GoogleCloudStorageRetryingInputStream.java | 2 +- .../gcs/GoogleCloudStorageRetryingInputStreamTests.java | 3 ++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java index 3f76ac5f1a667..f02692a742a29 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStore.java @@ -163,11 +163,8 @@ MeteredStorage clientNoRetries() throws IOException { return storageService.client(projectId, clientName, repositoryName, statsCollector, GoogleCloudStorageService.RetryBehaviour.None); } - /** - * @return The settings for the configured client - */ - GoogleCloudStorageClientSettings clientSettings() { - return storageService.clientSettings(projectId, clientName); + int getMaxRetries() { + return storageService.clientSettings(projectId, clientName).getMaxRetries(); } @Override diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index da2ea8af35084..2518d1294669d 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -140,7 +140,7 @@ public long getMeaningfulProgressSize() { @Override public int getMaxRetries() { - return blobStore.clientSettings().getMaxRetries(); + return blobStore.getMaxRetries(); } @Override diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java index c3dbaf79672d0..51cd1f32bdb78 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java @@ -151,7 +151,8 @@ private GoogleCloudStorageRetryingInputStream createRetryingInputStream(byte[] d } final GoogleCloudStorageBlobStore mockBlobStore = mock(GoogleCloudStorageBlobStore.class); - when(mockBlobStore.client()).thenReturn(meteredStorage); + when(mockBlobStore.clientNoRetries()).thenReturn(meteredStorage); + when(mockBlobStore.getMaxRetries()).thenReturn(6); return new GoogleCloudStorageRetryingInputStream( mockBlobStore, From e4b18ef968a7585e0bac0ada111e9a5486aadda1 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 19 Nov 2025 15:41:16 +1100 Subject: [PATCH 19/31] Fix tests again --- .../gcs/GoogleCloudStorageRetryingInputStreamTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java index 51cd1f32bdb78..5908f29bb194d 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStreamTests.java @@ -132,7 +132,8 @@ private GoogleCloudStorageRetryingInputStream createRetryingInputStream(byte[] d HttpResponse httpResponse = httpRequest.execute(); when(get.executeMedia()).thenReturn(httpResponse); final GoogleCloudStorageBlobStore mockBlobStore = mock(GoogleCloudStorageBlobStore.class); - when(mockBlobStore.client()).thenReturn(meteredStorage); + when(mockBlobStore.clientNoRetries()).thenReturn(meteredStorage); + when(mockBlobStore.getMaxRetries()).thenReturn(6); return new GoogleCloudStorageRetryingInputStream(mockBlobStore, OperationPurpose.SNAPSHOT_DATA, blobId); } From 3d2c2460084c5b0ff7d9b13414bb740228300fb3 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 19 Nov 2025 21:41:55 +1100 Subject: [PATCH 20/31] First pass on unit test --- .../blobstore/RetryingInputStreamTests.java | 318 ++++++++++++++++++ .../AbstractBlobContainerRetriesTestCase.java | 2 +- 2 files changed, 319 insertions(+), 1 deletion(-) create mode 100644 server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java diff --git a/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java new file mode 100644 index 0000000000000..78b6b26cd194d --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java @@ -0,0 +1,318 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.common.blobstore; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.Streams; +import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException; +import org.elasticsearch.test.ESTestCase; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomFiniteRetryingPurpose; +import static org.elasticsearch.repositories.blobstore.AbstractBlobContainerRetriesTestCase.randomRetryingPurpose; +import static org.hamcrest.Matchers.empty; + +public class RetryingInputStreamTests extends ESTestCase { + + public void testRetryableErrorsWhenReadingAreRetried() throws IOException { + final var attemptCounter = new AtomicInteger(); + final var retryableFailures = randomIntBetween(1, 5); + final var failureCounter = new AtomicInteger(retryableFailures); + final var retryStarted = new ArrayList(); + final var retrySucceeded = new ArrayList(); + final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(5, 200)); + logger.info("--> resource size: {}", resourceSize); + final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var eTag = randomUUID(); + + final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + @Override + public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, failureCounter.getAndDecrement() > 0); + logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); + } + + @Override + public int getMaxRetries() { + return retryableFailures * 2; + } + }, randomRetryingPurpose()) { + }; + + final var out = new ByteArrayOutputStream(); + Streams.copy(stream, out); + assertEquals(resourceBytes.length(), out.size()); + assertEquals(resourceBytes, new BytesArray(out.toByteArray())); + assertEquals(retryableFailures + 1, attemptCounter.get()); + assertEquals(Stream.generate(() -> "read").limit(retryableFailures).toList(), retryStarted); + } + + public void testReadWillFailWhenRetryableErrorsExceedMaxRetries() throws IOException { + final var attemptCounter = new AtomicInteger(); + final var maxRetries = randomIntBetween(1, 5); + final var retryStarted = new ArrayList(); + final var retrySucceeded = new ArrayList(); + final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(10, 100)); + logger.info("--> resource size: {}", resourceSize); + final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var eTag = randomUUID(); + + final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + @Override + public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, true); + logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); + } + + @Override + public int getMaxRetries() { + return maxRetries; + } + }, randomFiniteRetryingPurpose()) { + }; + + final var out = new ByteArrayOutputStream(); + final var ioException = assertThrows(IOException.class, () -> copy(stream, out)); + assertEquals("This is retry-able", ioException.getMessage()); + assertEquals(maxRetries + 1, attemptCounter.get()); + assertEquals(Stream.generate(() -> "read").limit(maxRetries + 1).toList(), retryStarted); + } + + public void testReadWillFailWhenRetryableErrorsOccurDuringRepositoryAnalysis() throws IOException { + final var attemptCounter = new AtomicInteger(); + final var maxRetries = randomIntBetween(2, 5); + final var retryStarted = new ArrayList(); + final var retrySucceeded = new ArrayList(); + final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(5, 200)); + logger.info("--> resource size: {}", resourceSize); + final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var eTag = randomUUID(); + + final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + @Override + public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, true); + logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); + } + + @Override + public int getMaxRetries() { + return maxRetries; + } + }, OperationPurpose.REPOSITORY_ANALYSIS) { + }; + + final var ioException = assertThrows(IOException.class, () -> copy(stream, OutputStream.nullOutputStream())); + assertEquals("This is retry-able", ioException.getMessage()); + assertEquals(1, attemptCounter.get()); + assertEquals(List.of("read"), retryStarted); + } + + public void testRetriesWillBeExtendedWhenMeaningfulProgressIsMade() throws IOException { + final var attemptCounter = new AtomicInteger(); + final var meaningfulProgressAttempts = randomIntBetween(1, 3); + final var maxRetries = randomIntBetween(1, 5); + final var meaningfulProgressSize = randomIntBetween(1024, 4096); + final var retryStarted = new ArrayList(); + final var retrySucceeded = new ArrayList(); + final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(100, 150)); + logger.info("--> resource size: {}", resourceSize); + final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var eTag = randomUUID(); + final AtomicInteger meaningfulProgressAttemptsCounter = new AtomicInteger(meaningfulProgressAttempts); + + final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + @Override + public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + final var inputStream = meaningfulProgressAttemptsCounter.decrementAndGet() > 0 + ? new FailureAtIndexInputStream(resourceBytes, (int) start, true, meaningfulProgressSize, Integer.MAX_VALUE) + : new FailureAtIndexInputStream(resourceBytes, (int) start, true, 1, meaningfulProgressSize - 1); + logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); + } + + @Override + public int getMaxRetries() { + return maxRetries; + } + + @Override + public long getMeaningfulProgressSize() { + return meaningfulProgressSize; + } + }, randomFiniteRetryingPurpose()) { + }; + + final var out = new ByteArrayOutputStream(); + final var ioException = assertThrows(IOException.class, () -> copy(stream, out)); + assertEquals("This is retry-able", ioException.getMessage()); + assertEquals(maxRetries + meaningfulProgressAttempts, attemptCounter.get()); + assertEquals(Stream.generate(() -> "read").limit(maxRetries + meaningfulProgressAttempts).toList(), retryStarted); + } + + public void testNoSuchFileExceptionAndRangeNotSatisfiedTerminatesWithoutRetry() { + final var notRetriableException = randomBoolean() + ? new NoSuchFileException("This is not retry-able") + : new RequestedRangeNotSatisfiedException("This is not retry-able", randomLong(), randomLong()); + final var attemptCounter = new AtomicInteger(); + final var retryableFailures = randomIntBetween(1, 5); + final var failureCounter = new AtomicInteger(retryableFailures); + final var retryStarted = new ArrayList(); + final var retrySucceeded = new ArrayList(); + + final IOException ioException = assertThrows(IOException.class, () -> { + final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + @Override + public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + if (failureCounter.getAndDecrement() > 0) { + if (randomBoolean()) { + throw new RuntimeException("This is retry-able"); + } else { + throw new IOException("This is retry-able"); + } + } + throw notRetriableException; + } + + @Override + public int getMaxRetries() { + return retryableFailures * 2; + } + }, randomRetryingPurpose()) { + }; + copy(stream, OutputStream.nullOutputStream()); + }); + assertSame(notRetriableException, ioException); + assertEquals(retryableFailures + 1, attemptCounter.get()); + assertEquals(List.of("open"), retryStarted); + assertThat(retrySucceeded, empty()); + } + + private static void copy(InputStream inputStream, OutputStream outputStream) throws IOException { + if (randomBoolean()) { + Streams.copy(inputStream, outputStream); + } else { + while (true) { + final int read = inputStream.read(); + if (read == -1) { + break; + } + outputStream.write(read); + } + } + } + + private abstract static class BlobStoreServicesAdapter implements RetryingInputStream.BlobStoreServices { + + private final List retryStarted; + private final List retrySucceeded; + + BlobStoreServicesAdapter(List retryStarted, List retrySucceeded) { + this.retryStarted = retryStarted; + this.retrySucceeded = retrySucceeded; + } + + BlobStoreServicesAdapter() { + this(new ArrayList<>(), new ArrayList<>()); + } + + @Override + public void onRetryStarted(String action) { + retryStarted.add(action); + } + + @Override + public void onRetrySucceeded(String action, long numberOfRetries) { + retrySucceeded.add(new Success(action, numberOfRetries)); + } + + @Override + public long getMeaningfulProgressSize() { + return Long.MAX_VALUE; + } + + @Override + public int getMaxRetries() { + return 0; + } + + @Override + public String getBlobDescription() { + return ""; + } + + record Success(String action, long numberOfRetries) {}; + } + + private static class FailureAtIndexInputStream extends InputStream { + + private final InputStream delegate; + private int readRemaining; + + private FailureAtIndexInputStream(BytesReference bytesReference, int startIndex, boolean failBeforeEnd) throws IOException { + this(bytesReference, startIndex, failBeforeEnd, 1, Integer.MAX_VALUE); + } + + private FailureAtIndexInputStream( + BytesReference bytesReference, + int startIndex, + boolean failBeforeEnd, + int minimumSuccess, + int maximumSuccess + ) throws IOException { + final int remainingBytes = bytesReference.length() - startIndex; + this.delegate = bytesReference.slice(startIndex, remainingBytes).streamInput(); + if (failBeforeEnd) { + this.readRemaining = randomIntBetween(Math.max(1, minimumSuccess), Math.min(maximumSuccess, remainingBytes / 2)); + } else { + this.readRemaining = Integer.MAX_VALUE; + } + } + + @Override + public int read() throws IOException { + if (readRemaining > 0) { + readRemaining--; + return delegate.read(); + } else { + throw new IOException("This is retry-able"); + } + } + + @Override + public String toString() { + return "Failing after " + readRemaining; + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java index 4bfa12224765d..dda77b31f9982 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java @@ -302,7 +302,7 @@ public static OperationPurpose randomRetryingPurpose() { return randomValueOtherThan(OperationPurpose.REPOSITORY_ANALYSIS, BlobStoreTestUtil::randomPurpose); } - protected OperationPurpose randomFiniteRetryingPurpose() { + public static OperationPurpose randomFiniteRetryingPurpose() { return randomValueOtherThanMany( purpose -> purpose == OperationPurpose.REPOSITORY_ANALYSIS || purpose == OperationPurpose.INDICES, BlobStoreTestUtil::randomPurpose From c88f8e27acaaea7fe06873b6141eed41db1a6271 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 10:46:40 +1100 Subject: [PATCH 21/31] Tidy up tests, fix log message --- .../common/blobstore/RetryingInputStream.java | 2 +- .../blobstore/RetryingInputStreamTests.java | 240 +++++++++--------- 2 files changed, 117 insertions(+), 125 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java index 84ec852b8add4..87da8d05cd8c6 100644 --- a/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java +++ b/server/src/main/java/org/elasticsearch/common/blobstore/RetryingInputStream.java @@ -252,7 +252,7 @@ private void delayBeforeRetry(long delayInMillis) { assert shouldRetry(attempt - 1) : "should not have retried"; Thread.sleep(delayInMillis); } catch (InterruptedException e) { - logger.info("s3 input stream delay interrupted", e); + logger.info("retrying input stream delay interrupted", e); Thread.currentThread().interrupt(); } } diff --git a/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java index 78b6b26cd194d..82ed1bbb5059e 100644 --- a/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java +++ b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java @@ -19,7 +19,6 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.List; @@ -33,193 +32,166 @@ public class RetryingInputStreamTests extends ESTestCase { public void testRetryableErrorsWhenReadingAreRetried() throws IOException { - final var attemptCounter = new AtomicInteger(); final var retryableFailures = randomIntBetween(1, 5); final var failureCounter = new AtomicInteger(retryableFailures); - final var retryStarted = new ArrayList(); - final var retrySucceeded = new ArrayList(); - final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(5, 200)); - logger.info("--> resource size: {}", resourceSize); - final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(5, 200)).getBytes()); final var eTag = randomUUID(); - final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + final var services = new BlobStoreServicesAdapter(retryableFailures * 2) { @Override - public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) throws IOException { - attemptCounter.incrementAndGet(); final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, failureCounter.getAndDecrement() > 0); - logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); } - - @Override - public int getMaxRetries() { - return retryableFailures * 2; - } - }, randomRetryingPurpose()) { }; - final var out = new ByteArrayOutputStream(); - Streams.copy(stream, out); - assertEquals(resourceBytes.length(), out.size()); - assertEquals(resourceBytes, new BytesArray(out.toByteArray())); - assertEquals(retryableFailures + 1, attemptCounter.get()); - assertEquals(Stream.generate(() -> "read").limit(retryableFailures).toList(), retryStarted); + byte[] results = copyToBytes(new RetryingInputStream<>(services, randomRetryingPurpose()) { + }); + assertEquals(resourceBytes.length(), results.length); + assertEquals(resourceBytes, new BytesArray(results)); + assertEquals(retryableFailures + 1, services.getAttempts()); + assertEquals(Stream.generate(() -> "read").limit(retryableFailures).toList(), services.getRetryStarted()); } - public void testReadWillFailWhenRetryableErrorsExceedMaxRetries() throws IOException { - final var attemptCounter = new AtomicInteger(); + public void testReadWillFailWhenRetryableErrorsExceedMaxRetries() { final var maxRetries = randomIntBetween(1, 5); - final var retryStarted = new ArrayList(); - final var retrySucceeded = new ArrayList(); - final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(10, 100)); - logger.info("--> resource size: {}", resourceSize); - final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(10, 100)).getBytes()); final var eTag = randomUUID(); - final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + final var services = new BlobStoreServicesAdapter(maxRetries) { @Override - public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) throws IOException { - attemptCounter.incrementAndGet(); final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, true); - logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); } - - @Override - public int getMaxRetries() { - return maxRetries; - } - }, randomFiniteRetryingPurpose()) { }; - final var out = new ByteArrayOutputStream(); - final var ioException = assertThrows(IOException.class, () -> copy(stream, out)); + final var ioException = assertThrows( + IOException.class, + () -> copyToBytes(new RetryingInputStream<>(services, randomFiniteRetryingPurpose()) { + }) + ); assertEquals("This is retry-able", ioException.getMessage()); - assertEquals(maxRetries + 1, attemptCounter.get()); - assertEquals(Stream.generate(() -> "read").limit(maxRetries + 1).toList(), retryStarted); + assertEquals(maxRetries + 1, services.getAttempts()); + assertEquals(Stream.generate(() -> "read").limit(maxRetries + 1).toList(), services.getRetryStarted()); } - public void testReadWillFailWhenRetryableErrorsOccurDuringRepositoryAnalysis() throws IOException { - final var attemptCounter = new AtomicInteger(); + public void testReadWillFailWhenRetryableErrorsOccurDuringRepositoryAnalysis() { final var maxRetries = randomIntBetween(2, 5); - final var retryStarted = new ArrayList(); - final var retrySucceeded = new ArrayList(); - final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(5, 200)); - logger.info("--> resource size: {}", resourceSize); - final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(5, 200)).getBytes()); final var eTag = randomUUID(); - final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + final var services = new BlobStoreServicesAdapter(maxRetries) { @Override - public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) throws IOException { - attemptCounter.incrementAndGet(); final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, true); - logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); } + }; + + final var ioException = assertThrows( + IOException.class, + () -> copyToBytes(new RetryingInputStream<>(services, OperationPurpose.REPOSITORY_ANALYSIS) { + }) + ); + assertEquals("This is retry-able", ioException.getMessage()); + assertEquals(1, services.getAttempts()); + assertEquals(List.of("read"), services.getRetryStarted()); + } + + public void testReadWillRetryIndefinitelyWhenErrorsOccurDuringIndicesOperation() throws IOException { + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(5, 200)).getBytes()); + final int numberOfFailures = randomIntBetween(1, 10); + final AtomicInteger failureCounter = new AtomicInteger(numberOfFailures); + final var eTag = randomUUID(); + final var services = new BlobStoreServicesAdapter(0) { @Override - public int getMaxRetries() { - return maxRetries; + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) + throws IOException { + final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, failureCounter.getAndDecrement() > 0); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); } - }, OperationPurpose.REPOSITORY_ANALYSIS) { }; - final var ioException = assertThrows(IOException.class, () -> copy(stream, OutputStream.nullOutputStream())); - assertEquals("This is retry-able", ioException.getMessage()); - assertEquals(1, attemptCounter.get()); - assertEquals(List.of("read"), retryStarted); + byte[] result = copyToBytes(new RetryingInputStream<>(services, OperationPurpose.INDICES) { + }); + assertEquals(resourceBytes, new BytesArray(result)); + assertEquals(numberOfFailures + 1, services.getAttempts()); + assertEquals(Stream.generate(() -> "read").limit(numberOfFailures).toList(), services.getRetryStarted()); } - public void testRetriesWillBeExtendedWhenMeaningfulProgressIsMade() throws IOException { - final var attemptCounter = new AtomicInteger(); - final var meaningfulProgressAttempts = randomIntBetween(1, 3); + public void testRetriesWillBeExtendedWhenMeaningfulProgressIsMade() { final var maxRetries = randomIntBetween(1, 5); + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(100, 150)).getBytes()); final var meaningfulProgressSize = randomIntBetween(1024, 4096); - final var retryStarted = new ArrayList(); - final var retrySucceeded = new ArrayList(); - final var resourceSize = ByteSizeValue.ofKb(randomIntBetween(100, 150)); - logger.info("--> resource size: {}", resourceSize); - final var resourceBytes = randomBytesReference((int) resourceSize.getBytes()); + final var meaningfulProgressAttempts = randomIntBetween(1, 3); + final var meaningfulProgressAttemptsCounter = new AtomicInteger(meaningfulProgressAttempts); final var eTag = randomUUID(); - final AtomicInteger meaningfulProgressAttemptsCounter = new AtomicInteger(meaningfulProgressAttempts); - final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { + final var services = new BlobStoreServicesAdapter(maxRetries) { @Override - public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) throws IOException { - attemptCounter.incrementAndGet(); final var inputStream = meaningfulProgressAttemptsCounter.decrementAndGet() > 0 ? new FailureAtIndexInputStream(resourceBytes, (int) start, true, meaningfulProgressSize, Integer.MAX_VALUE) : new FailureAtIndexInputStream(resourceBytes, (int) start, true, 1, meaningfulProgressSize - 1); - logger.info("--> reading from stream at version [{} -> {}] {}", start, end, inputStream); return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); } - @Override - public int getMaxRetries() { - return maxRetries; - } - @Override public long getMeaningfulProgressSize() { return meaningfulProgressSize; } - }, randomFiniteRetryingPurpose()) { }; - final var out = new ByteArrayOutputStream(); - final var ioException = assertThrows(IOException.class, () -> copy(stream, out)); + final var ioException = assertThrows( + IOException.class, + () -> copyToBytes(new RetryingInputStream<>(services, randomFiniteRetryingPurpose()) { + }) + ); assertEquals("This is retry-able", ioException.getMessage()); - assertEquals(maxRetries + meaningfulProgressAttempts, attemptCounter.get()); - assertEquals(Stream.generate(() -> "read").limit(maxRetries + meaningfulProgressAttempts).toList(), retryStarted); + assertEquals(maxRetries + meaningfulProgressAttempts, services.getAttempts()); + assertEquals(Stream.generate(() -> "read").limit(maxRetries + meaningfulProgressAttempts).toList(), services.getRetryStarted()); } public void testNoSuchFileExceptionAndRangeNotSatisfiedTerminatesWithoutRetry() { final var notRetriableException = randomBoolean() ? new NoSuchFileException("This is not retry-able") : new RequestedRangeNotSatisfiedException("This is not retry-able", randomLong(), randomLong()); - final var attemptCounter = new AtomicInteger(); final var retryableFailures = randomIntBetween(1, 5); final var failureCounter = new AtomicInteger(retryableFailures); - final var retryStarted = new ArrayList(); - final var retrySucceeded = new ArrayList(); - - final IOException ioException = assertThrows(IOException.class, () -> { - final var stream = new RetryingInputStream<>(new BlobStoreServicesAdapter(retryStarted, retrySucceeded) { - @Override - public RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) - throws IOException { - attemptCounter.incrementAndGet(); - if (failureCounter.getAndDecrement() > 0) { - if (randomBoolean()) { - throw new RuntimeException("This is retry-able"); - } else { - throw new IOException("This is retry-able"); - } - } - throw notRetriableException; - } - @Override - public int getMaxRetries() { - return retryableFailures * 2; + final var services = new BlobStoreServicesAdapter(retryableFailures * 2) { + @Override + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) + throws IOException { + if (failureCounter.getAndDecrement() > 0) { + if (randomBoolean()) { + throw new RuntimeException("This is retry-able"); + } else { + throw new IOException("This is retry-able"); + } } - }, randomRetryingPurpose()) { - }; - copy(stream, OutputStream.nullOutputStream()); - }); + throw notRetriableException; + } + }; + final IOException ioException = assertThrows( + IOException.class, + () -> copyToBytes(new RetryingInputStream<>(services, randomRetryingPurpose()) { + }) + ); assertSame(notRetriableException, ioException); - assertEquals(retryableFailures + 1, attemptCounter.get()); - assertEquals(List.of("open"), retryStarted); - assertThat(retrySucceeded, empty()); + assertEquals(retryableFailures + 1, services.getAttempts()); + assertEquals(List.of("open"), services.getRetryStarted()); + assertThat(services.getRetrySucceeded(), empty()); } - private static void copy(InputStream inputStream, OutputStream outputStream) throws IOException { + private static byte[] copyToBytes(InputStream inputStream) throws IOException { + final var outputStream = new ByteArrayOutputStream(); if (randomBoolean()) { Streams.copy(inputStream, outputStream); } else { @@ -231,22 +203,30 @@ private static void copy(InputStream inputStream, OutputStream outputStream) thr outputStream.write(read); } } + return outputStream.toByteArray(); } private abstract static class BlobStoreServicesAdapter implements RetryingInputStream.BlobStoreServices { - private final List retryStarted; - private final List retrySucceeded; + private final AtomicInteger attemptCounter = new AtomicInteger(); + private final List retryStarted = new ArrayList<>(); + private final List retrySucceeded = new ArrayList<>(); + private final int maxRetries; - BlobStoreServicesAdapter(List retryStarted, List retrySucceeded) { - this.retryStarted = retryStarted; - this.retrySucceeded = retrySucceeded; + private BlobStoreServicesAdapter(int maxRetries) { + this.maxRetries = maxRetries; } - BlobStoreServicesAdapter() { - this(new ArrayList<>(), new ArrayList<>()); + @Override + public final RetryingInputStream.InputStreamAtVersion getInputStreamAtVersion(String version, long start, long end) + throws IOException { + attemptCounter.incrementAndGet(); + return doGetInputStream(version, start, end); } + protected abstract RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) + throws IOException; + @Override public void onRetryStarted(String action) { retryStarted.add(action); @@ -263,8 +243,8 @@ public long getMeaningfulProgressSize() { } @Override - public int getMaxRetries() { - return 0; + public final int getMaxRetries() { + return maxRetries; } @Override @@ -273,6 +253,18 @@ public String getBlobDescription() { } record Success(String action, long numberOfRetries) {}; + + public int getAttempts() { + return attemptCounter.get(); + } + + public List getRetryStarted() { + return retryStarted; + } + + public List getRetrySucceeded() { + return retrySucceeded; + } } private static class FailureAtIndexInputStream extends InputStream { From 14cff382d8af779f94c317d8cd4e878f7c9a4e4c Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 10:49:46 +1100 Subject: [PATCH 22/31] Minimise change --- .../azure/AzureBlobStoreRepositoryTests.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java index 48f286737b4da..d7b6874aa79ed 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java @@ -8,14 +8,14 @@ */ package org.elasticsearch.repositories.azure; -import fixture.azure.AzureHttpHandler; -import fixture.azure.MockAzureBlobStore; - import com.azure.storage.common.policy.RequestRetryOptions; import com.azure.storage.common.policy.RetryPolicyType; import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; +import fixture.azure.AzureHttpHandler; +import fixture.azure.MockAzureBlobStore; + import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.cluster.project.ProjectResolver; import org.elasticsearch.cluster.service.ClusterService; @@ -348,10 +348,7 @@ public void testDeleteBlobsIgnoringIfNotExists() throws Exception { public void testNotFoundErrorMessageContainsFullKey() throws Exception { try (BlobStore store = newBlobStore()) { BlobContainer container = store.blobContainer(BlobPath.EMPTY.add("nested").add("dir")); - NoSuchFileException exception = expectThrows( - NoSuchFileException.class, - () -> container.readBlob(randomRetryingPurpose(), "blob") - ); + NoSuchFileException exception = expectThrows(NoSuchFileException.class, () -> container.readBlob(randomPurpose(), "blob")); assertThat(exception.getMessage(), containsString("nested/dir/blob] not found")); } } From 167b29c552e184344745d6b44751ea3b5069b240 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 19 Nov 2025 23:58:34 +0000 Subject: [PATCH 23/31] [CI] Auto commit changes from spotless --- .../repositories/azure/AzureBlobStoreRepositoryTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java index d7b6874aa79ed..4792de1427ce1 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java @@ -8,14 +8,14 @@ */ package org.elasticsearch.repositories.azure; +import fixture.azure.AzureHttpHandler; +import fixture.azure.MockAzureBlobStore; + import com.azure.storage.common.policy.RequestRetryOptions; import com.azure.storage.common.policy.RetryPolicyType; import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; -import fixture.azure.AzureHttpHandler; -import fixture.azure.MockAzureBlobStore; - import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.cluster.project.ProjectResolver; import org.elasticsearch.cluster.service.ClusterService; From ce4bd1f299d2c24a6f7a876b366d4c7e7c13d286 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 10:55:23 +1100 Subject: [PATCH 24/31] Test that version is requested for retries --- .../blobstore/RetryingInputStreamTests.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java index 82ed1bbb5059e..8e059e23c0f23 100644 --- a/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java +++ b/server/src/test/java/org/elasticsearch/common/blobstore/RetryingInputStreamTests.java @@ -190,6 +190,30 @@ public RetryingInputStream.InputStreamAtVersion doGetInputStream(String assertThat(services.getRetrySucceeded(), empty()); } + public void testBlobVersionIsRequestedForSecondAndSubsequentAttempts() throws IOException { + final var resourceBytes = randomBytesReference((int) ByteSizeValue.ofKb(randomIntBetween(5, 200)).getBytes()); + final int numberOfFailures = randomIntBetween(1, 10); + final AtomicInteger failureCounter = new AtomicInteger(numberOfFailures); + final var eTag = randomUUID(); + + final var services = new BlobStoreServicesAdapter(0) { + @Override + public RetryingInputStream.InputStreamAtVersion doGetInputStream(String version, long start, long end) + throws IOException { + if (getAttempts() > 1) { + assertEquals(eTag, version); + } else { + assertNull(version); + } + final var inputStream = new FailureAtIndexInputStream(resourceBytes, (int) start, failureCounter.getAndDecrement() > 0); + return new RetryingInputStream.InputStreamAtVersion<>(inputStream, eTag); + } + }; + + copyToBytes(new RetryingInputStream<>(services, OperationPurpose.INDICES) { + }); + } + private static byte[] copyToBytes(InputStream inputStream) throws IOException { final var outputStream = new ByteArrayOutputStream(); if (randomBoolean()) { From 0e3ab7960ed1b4dcde0bc7153c6123ee0ddd4bc6 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 11:43:54 +1100 Subject: [PATCH 25/31] Put some values in for "meaningful progress" --- .../repositories/azure/AzureRetryingInputStream.java | 3 +-- .../gcs/GoogleCloudStorageRetryingInputStream.java | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java index dd40e479a15ba..03a165f2a8695 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRetryingInputStream.java @@ -73,8 +73,7 @@ public void onRetrySucceeded(String action, long numberOfRetries) { @Override public long getMeaningfulProgressSize() { - // Any progress is meaningful for Azure - return 1; + return Math.max(1L, blobStore.getReadChunkSize() / 100L); } @Override diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java index 2518d1294669d..e3b4cf55c1594 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRetryingInputStream.java @@ -134,8 +134,7 @@ public void onRetrySucceeded(String action, long numberOfRetries) { @Override public long getMeaningfulProgressSize() { - // Any progress is meaningful for GCS - return 1; + return Math.max(1L, GoogleCloudStorageBlobStore.SDK_DEFAULT_CHUNK_SIZE / 100L); } @Override From a49cbedc9e3aedba0b17845581f7d02ddcdc6613 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 11:50:58 +1100 Subject: [PATCH 26/31] Add comment indicating that getInputStream won't retry on failures --- .../org/elasticsearch/repositories/azure/AzureBlobStore.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index fc13c65a25e92..07d3762c7fdb1 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -348,6 +348,10 @@ int getMaxReadRetries() { return service.getMaxReadRetries(projectId, clientName); } + /** + * Get an {@link InputStream} for reading the specified blob. The returned input stream will not retry on a read failure, + * to get an input stream that implements retries use {@link AzureBlobContainer#readBlob(OperationPurpose, String, long, long)} + */ AzureInputStream getInputStream( OperationPurpose purpose, String blob, From 47f1834d40b93370ec9abe42ecd360a9e573b1ae Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 11:53:42 +1100 Subject: [PATCH 27/31] Minimise change --- .../repositories/azure/AzureBlobContainerRetriesTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java index c25565d09b1df..6481c8250c5b3 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerRetriesTests.java @@ -65,11 +65,11 @@ public void testReadNonexistentBlobThrowsNoSuchFileException() { final BlobContainer blobContainer = createBlobContainer(between(1, 5)); final Exception exception = expectThrows(NoSuchFileException.class, () -> { if (randomBoolean()) { - blobContainer.readBlob(randomRetryingPurpose(), "read_nonexistent_blob"); + blobContainer.readBlob(randomPurpose(), "read_nonexistent_blob"); } else { final long position = randomLongBetween(0, MAX_RANGE_VAL - 1L); final long length = randomLongBetween(1, MAX_RANGE_VAL - position); - blobContainer.readBlob(randomRetryingPurpose(), "read_nonexistent_blob", position, length); + blobContainer.readBlob(randomPurpose(), "read_nonexistent_blob", position, length); } }); assertThat(exception.toString(), exception.getMessage().toLowerCase(Locale.ROOT), containsString("not found")); From 5b23aa94daeda7a17fb88da975ec272f3a4e42f8 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 12:04:06 +1100 Subject: [PATCH 28/31] Use default RetrySettings from the GCS client code --- .../repositories/gcs/GoogleCloudStorageService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java index 40baeac8c249e..b7f8423f53c2a 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java @@ -99,13 +99,13 @@ public enum RetryBehaviour { ClientConfigured { @Override public RetrySettings createRetrySettings(GoogleCloudStorageClientSettings settings) { - return RetrySettings.newBuilder().setMaxAttempts(settings.getMaxRetries() + 1).build(); + return ServiceOptions.getDefaultRetrySettings().toBuilder().setMaxAttempts(settings.getMaxRetries() + 1).build(); } }, None { @Override public RetrySettings createRetrySettings(GoogleCloudStorageClientSettings settings) { - return RetrySettings.newBuilder().setMaxAttempts(1).build(); + return ServiceOptions.getNoRetrySettings(); } }; From af8bc551068ae3af301bbe014f9e83cff5f5c606 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 12:18:41 +1100 Subject: [PATCH 29/31] Make GoogleCloudStorageClientsManagerTests retry-behaviour aware --- ...GoogleCloudStorageClientsManagerTests.java | 69 +++++++++++++------ 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java index 8f9415132083b..db8dfe299c24e 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java @@ -43,7 +43,6 @@ import java.util.stream.Stream; import static org.elasticsearch.repositories.gcs.GoogleCloudStorageClientSettings.CREDENTIALS_FILE_SETTING; -import static org.elasticsearch.repositories.gcs.GoogleCloudStorageService.RetryBehaviour.ClientConfigured; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsInAnyOrder; @@ -148,32 +147,42 @@ public void testClientsLifeCycleForSingleProject() throws Exception { final ProjectId projectId = randomUniqueProjectId(); final String clientName = randomFrom(clientNames); final String anotherClientName = randomValueOtherThan(clientName, () -> randomFrom(clientNames)); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + final GoogleCloudStorageService.RetryBehaviour otherRetryBehaviour = randomValueOtherThan( + retryBehaviour, + () -> randomFrom(GoogleCloudStorageService.RetryBehaviour.values()) + ); // Configure project secrets for one client - assertClientNotFound(projectId, clientName); + assertClientNotFound(projectId, clientName, retryBehaviour); updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientName)); { assertProjectClientSettings(projectId, clientName); // Retrieve client for the 1st time - final var initialClient = getClientFromManager(projectId, clientName); + final var initialClient = getClientFromManager(projectId, clientName, retryBehaviour); assertClientCredentials(projectId, clientName, initialClient); // Client is cached when retrieved again - assertThat(initialClient, sameInstance(getClientFromManager(projectId, clientName))); + assertThat(initialClient, sameInstance(getClientFromManager(projectId, clientName, retryBehaviour))); // Client not configured cannot be accessed, - assertClientNotFound(projectId, anotherClientName); + assertClientNotFound(projectId, anotherClientName, retryBehaviour); // Update client secrets should release and recreate the client updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientName, anotherClientName)); assertProjectClientSettings(projectId, clientName, anotherClientName); - final var clientUpdated = getClientFromManager(projectId, clientName); + final var clientUpdated = getClientFromManager(projectId, clientName, retryBehaviour); assertThat(clientUpdated, not(sameInstance(initialClient))); // A different client for a different client name - final var anotherClient = getClientFromManager(projectId, anotherClientName); + final var anotherClient = getClientFromManager(projectId, anotherClientName, retryBehaviour); assertClientCredentials(projectId, anotherClientName, anotherClient); assertThat(anotherClient, not(sameInstance(clientUpdated))); + + // A different client for a different retry behaviour + final var anotherRetryBehaviour = getClientFromManager(projectId, clientName, otherRetryBehaviour); + assertClientCredentials(projectId, clientName, anotherRetryBehaviour); + assertThat(anotherRetryBehaviour, not(sameInstance(clientUpdated))); } // Remove project secrets or the entire project @@ -182,16 +191,17 @@ public void testClientsLifeCycleForSingleProject() throws Exception { } else { removeProjectFromClusterState(projectId); } - assertClientNotFound(projectId, clientName); + assertClientNotFound(projectId, clientName, retryBehaviour); assertThat(gcsClientsManager.getPerProjectClientsHolders(), not(hasKey(projectId))); } public void testClientsWithNoCredentialsAreFilteredOut() throws IOException { final ProjectId projectId = randomUniqueProjectId(); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientNames.toArray(String[]::new))); for (var clientName : clientNames) { - assertNotNull(getClientFromManager(projectId, clientName)); + assertNotNull(getClientFromManager(projectId, clientName, retryBehaviour)); } final List clientsWithIncorrectSecretsConfig = randomNonEmptySubsetOf(clientNames); @@ -206,15 +216,16 @@ public void testClientsWithNoCredentialsAreFilteredOut() throws IOException { for (var clientName : clientNames) { if (clientsWithIncorrectSecretsConfig.contains(clientName)) { - assertClientNotFound(projectId, clientName); + assertClientNotFound(projectId, clientName, retryBehaviour); } else { - assertNotNull(getClientFromManager(projectId, clientName)); + assertNotNull(getClientFromManager(projectId, clientName, retryBehaviour)); } } } public void testClientsForMultipleProjects() throws InterruptedException { final List projectIds = randomList(2, 8, ESTestCase::randomUniqueProjectId); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); final List threads = projectIds.stream().map(projectId -> new Thread(() -> { final int iterations = between(1, 3); @@ -225,7 +236,7 @@ public void testClientsForMultipleProjects() throws InterruptedException { assertProjectClientSettings(projectId, clientNames.toArray(String[]::new)); for (var clientName : shuffledList(clientNames)) { try { - final var meteredStorage = getClientFromManager(projectId, clientName); + final var meteredStorage = getClientFromManager(projectId, clientName, retryBehaviour); assertClientCredentials(projectId, clientName, meteredStorage); } catch (IOException e) { fail(e); @@ -238,7 +249,7 @@ public void testClientsForMultipleProjects() throws InterruptedException { removeProjectFromClusterState(projectId); } assertThat(gcsClientsManager.getPerProjectClientsHolders(), not(hasKey(projectId))); - clientNames.forEach(clientName -> assertClientNotFound(projectId, clientName)); + clientNames.forEach(clientName -> assertClientNotFound(projectId, clientName, retryBehaviour)); } })).toList(); @@ -252,16 +263,17 @@ public void testClusterAndProjectClients() throws IOException { final ProjectId projectId = randomUniqueProjectId(); final String clientName = randomFrom(clientNames); final boolean configureProjectClientsFirst = randomBoolean(); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); if (configureProjectClientsFirst) { updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientName)); } - final var clusterClient = getClientFromService(projectIdForClusterClient(), clientName); + final var clusterClient = getClientFromService(projectIdForClusterClient(), clientName, retryBehaviour); if (configureProjectClientsFirst == false) { assertThat(gcsClientsManager.getPerProjectClientsHolders(), anEmptyMap()); updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientName)); } - final var projectClient = getClientFromService(projectId, clientName); + final var projectClient = getClientFromService(projectId, clientName, retryBehaviour); assertThat(projectClient, not(sameInstance(clusterClient))); // Release the cluster client @@ -291,15 +303,25 @@ public void testProjectClientsDisabled() throws IOException { // Cluster client still works final String clientName = randomFrom(clientNames); - assertNotNull(getClientFromService(projectIdForClusterClient(), clientName)); + assertNotNull( + getClientFromService(projectIdForClusterClient(), clientName, randomFrom(GoogleCloudStorageService.RetryBehaviour.values())) + ); } - private MeteredStorage getClientFromManager(ProjectId projectId, String clientName) throws IOException { - return gcsClientsManager.client(projectId, clientName, repoNameForClient(clientName), statsCollector, ClientConfigured); + private MeteredStorage getClientFromManager( + ProjectId projectId, + String clientName, + GoogleCloudStorageService.RetryBehaviour retryBehaviour + ) throws IOException { + return gcsClientsManager.client(projectId, clientName, repoNameForClient(clientName), statsCollector, retryBehaviour); } - private MeteredStorage getClientFromService(ProjectId projectId, String clientName) throws IOException { - return googleCloudStorageService.client(projectId, clientName, repoNameForClient(clientName), statsCollector, ClientConfigured); + private MeteredStorage getClientFromService( + ProjectId projectId, + String clientName, + GoogleCloudStorageService.RetryBehaviour retryBehaviour + ) throws IOException { + return googleCloudStorageService.client(projectId, clientName, repoNameForClient(clientName), statsCollector, retryBehaviour); } private ProjectId projectIdForClusterClient() { @@ -333,8 +355,11 @@ private void assertClientCredentials(ProjectId projectId, String clientName, Met assertThat(credentials.getPrivateKeyId(), equalTo(projectClientPrivateKeyId(projectId, clientName))); } - private void assertClientNotFound(ProjectId projectId, String clientName) { - final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> getClientFromManager(projectId, clientName)); + private void assertClientNotFound(ProjectId projectId, String clientName, GoogleCloudStorageService.RetryBehaviour retryBehaviour) { + final IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> getClientFromManager(projectId, clientName, retryBehaviour) + ); assertThat( e.getMessage(), anyOf( From 86aa89f6c315db52dd69f9a5e75ea82457e66ceb Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 12:19:16 +1100 Subject: [PATCH 30/31] Tidy --- .../GoogleCloudStorageClientsManagerTests.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java index db8dfe299c24e..eeb236856ff47 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java @@ -147,7 +147,7 @@ public void testClientsLifeCycleForSingleProject() throws Exception { final ProjectId projectId = randomUniqueProjectId(); final String clientName = randomFrom(clientNames); final String anotherClientName = randomValueOtherThan(clientName, () -> randomFrom(clientNames)); - final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); final GoogleCloudStorageService.RetryBehaviour otherRetryBehaviour = randomValueOtherThan( retryBehaviour, () -> randomFrom(GoogleCloudStorageService.RetryBehaviour.values()) @@ -198,7 +198,7 @@ public void testClientsLifeCycleForSingleProject() throws Exception { public void testClientsWithNoCredentialsAreFilteredOut() throws IOException { final ProjectId projectId = randomUniqueProjectId(); - final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientNames.toArray(String[]::new))); for (var clientName : clientNames) { assertNotNull(getClientFromManager(projectId, clientName, retryBehaviour)); @@ -225,7 +225,7 @@ public void testClientsWithNoCredentialsAreFilteredOut() throws IOException { public void testClientsForMultipleProjects() throws InterruptedException { final List projectIds = randomList(2, 8, ESTestCase::randomUniqueProjectId); - final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); final List threads = projectIds.stream().map(projectId -> new Thread(() -> { final int iterations = between(1, 3); @@ -263,7 +263,7 @@ public void testClusterAndProjectClients() throws IOException { final ProjectId projectId = randomUniqueProjectId(); final String clientName = randomFrom(clientNames); final boolean configureProjectClientsFirst = randomBoolean(); - final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); if (configureProjectClientsFirst) { updateProjectInClusterState(projectId, newProjectClientsSecrets(projectId, clientName)); } @@ -303,9 +303,7 @@ public void testProjectClientsDisabled() throws IOException { // Cluster client still works final String clientName = randomFrom(clientNames); - assertNotNull( - getClientFromService(projectIdForClusterClient(), clientName, randomFrom(GoogleCloudStorageService.RetryBehaviour.values())) - ); + assertNotNull(getClientFromService(projectIdForClusterClient(), clientName, randomRetryBehaviour())); } private MeteredStorage getClientFromManager( @@ -316,6 +314,10 @@ private MeteredStorage getClientFromManager( return gcsClientsManager.client(projectId, clientName, repoNameForClient(clientName), statsCollector, retryBehaviour); } + private static GoogleCloudStorageService.RetryBehaviour randomRetryBehaviour() { + return randomFrom(GoogleCloudStorageService.RetryBehaviour.values()); + } + private MeteredStorage getClientFromService( ProjectId projectId, String clientName, From e3d54da5dd076a82f6675ab7d2b6390623a0e788 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 12:20:32 +1100 Subject: [PATCH 31/31] Tidy --- .../repositories/gcs/GoogleCloudStorageClientsManagerTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java index eeb236856ff47..b1c9e85834f3e 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageClientsManagerTests.java @@ -150,7 +150,7 @@ public void testClientsLifeCycleForSingleProject() throws Exception { final GoogleCloudStorageService.RetryBehaviour retryBehaviour = randomRetryBehaviour(); final GoogleCloudStorageService.RetryBehaviour otherRetryBehaviour = randomValueOtherThan( retryBehaviour, - () -> randomFrom(GoogleCloudStorageService.RetryBehaviour.values()) + GoogleCloudStorageClientsManagerTests::randomRetryBehaviour ); // Configure project secrets for one client