Skip to content

Commit eed647c

Browse files
committed
HADOOP-19654. Upgrade AWS SDK to 2.35.4 (#7882)
AWS SDK upgraded to 2.35.4. This SDK has changed checksum/checksum headers handling significantly, causing problems with third party stores, and, in some combinations AWS S3 itself. The S3A connector has retained old behavior; options to change these settings are now available. The default settings are chosen for maximum compatiblity and performance. fs.s3a.request.md5.header: true fs.s3a.checksum.generation: false fs.s3a.create.checksum.algorithm: "" Consult the documentation for more details. Contributed by Steve Loughran
1 parent f305840 commit eed647c

File tree

65 files changed

+1316
-209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1316
-209
lines changed

LICENSE-binary

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ io.reactivex:rxnetty:0.4.20
295295
io.swagger:swagger-annotations:1.5.4
296296
javax.inject:javax.inject:1
297297
net.java.dev.jna:jna:5.2.0
298-
net.minidev:accessors-smart:1.2
298+
net.minidev:accessors-smart:1.21
299299
org.apache.avro:avro:1.11.4
300300
org.apache.commons:commons-compress:1.26.1
301301
org.apache.commons:commons-configuration2:2.10.1
@@ -360,7 +360,8 @@ org.objenesis:objenesis:2.6
360360
org.xerial.snappy:snappy-java:1.1.10.4
361361
org.yaml:snakeyaml:2.0
362362
org.wildfly.openssl:wildfly-openssl:2.2.5.Final
363-
software.amazon.awssdk:bundle:2.29.52
363+
software.amazon.awssdk:bundle:2.35.4
364+
software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3:1.3.0
364365

365366
--------------------------------------------------------------------------------
366367
This product bundles various third-party components under other open source

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.contract;
2020

21+
import org.assertj.core.api.Assertions;
2122
import org.junit.Test;
2223

2324
import java.io.IOException;
@@ -28,6 +29,7 @@
2829

2930
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
3031
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
32+
import static org.apache.hadoop.fs.contract.ContractTestUtils.readNBytes;
3133

3234
/**
3335
* Contract tests for {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer}.
@@ -136,10 +138,12 @@ protected void validateFileContents(FSDataInputStream stream, int length,
136138
int startIndex)
137139
throws IOException {
138140
byte[] streamData = new byte[length];
139-
assertEquals("failed to read expected number of bytes from "
140-
+ "stream. This may be transient",
141-
length, stream.read(streamData));
141+
final int read = readNBytes(stream, streamData, 0, length);
142+
Assertions.assertThat(read)
143+
.describedAs("failed to read expected number of bytes from stream. %s", stream)
144+
.isEqualTo(length);
142145
byte[] validateFileBytes;
146+
143147
if (startIndex == 0 && length == fileBytes.length) {
144148
validateFileBytes = fileBytes;
145149
} else {

hadoop-project/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@
202202
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
203203
<surefire.fork.timeout>900</surefire.fork.timeout>
204204
<aws-java-sdk.version>1.12.720</aws-java-sdk.version>
205-
<aws-java-sdk-v2.version>2.29.52</aws-java-sdk-v2.version>
205+
<aws-java-sdk-v2.version>2.35.4</aws-java-sdk-v2.version>
206206
<amazon-s3-encryption-client-java.version>3.1.1</amazon-s3-encryption-client-java.version>
207207
<amazon-s3-analyticsaccelerator-s3.version>1.2.1</amazon-s3-analyticsaccelerator-s3.version>
208208
<aws.eventstream.version>1.0.1</aws.eventstream.version>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,8 @@ public String getMessage() {
5454
public boolean retryable() {
5555
return getCause().retryable();
5656
}
57+
58+
public String getOperation() {
59+
return operation;
60+
}
5761
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
* Status code 443, no response from server. This is considered idempotent.
2525
*/
2626
public class AWSNoResponseException extends AWSServiceIOException {
27+
28+
/**
29+
* Constructor.
30+
* @param operation operation in progress.
31+
* @param cause inner cause
32+
*/
2733
public AWSNoResponseException(String operation,
2834
AwsServiceException cause) {
2935
super(operation, cause);

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.hadoop.classification.InterfaceAudience;
2222
import org.apache.hadoop.classification.InterfaceStability;
2323
import org.apache.hadoop.fs.Options;
24+
import org.apache.hadoop.fs.s3a.impl.ChecksumSupport;
2425
import org.apache.hadoop.fs.s3a.impl.streams.StreamIntegration;
2526
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
2627

@@ -1777,15 +1778,53 @@ private Constants() {
17771778
*/
17781779
public static final boolean CHECKSUM_VALIDATION_DEFAULT = false;
17791780

1781+
/**
1782+
* Should checksums always be generated?
1783+
* Not all third-party stores like this being enabled for every request.
1784+
* Value: {@value}.
1785+
*/
1786+
public static final String CHECKSUM_GENERATION =
1787+
"fs.s3a.checksum.generation";
1788+
1789+
/**
1790+
* Default value of {@link #CHECKSUM_GENERATION}.
1791+
* Value: {@value}.
1792+
*/
1793+
public static final boolean DEFAULT_CHECKSUM_GENERATION = false;
1794+
17801795
/**
17811796
* Indicates the algorithm used to create the checksum for the object
17821797
* to be uploaded to S3. Unset by default. It supports the following values:
1783-
* 'CRC32', 'CRC32C', 'SHA1', and 'SHA256'
1798+
* 'CRC32', 'CRC32C', 'SHA1', 'SHA256', 'CRC64_NVME 'NONE', ''.
1799+
* When checksum calculation is enabled this MUST be set to a valid algorithm.
17841800
* value:{@value}
17851801
*/
17861802
public static final String CHECKSUM_ALGORITHM =
17871803
"fs.s3a.create.checksum.algorithm";
17881804

1805+
/**
1806+
* Default checksum algorithm: {@code "NONE"}.
1807+
*/
1808+
public static final String DEFAULT_CHECKSUM_ALGORITHM =
1809+
ChecksumSupport.NONE;
1810+
1811+
/**
1812+
* Send a {@code Content-MD5 header} with every request.
1813+
* This is required when performing some operations with third party stores
1814+
* For example: bulk delete).
1815+
* It is supported by AWS S3, though has unexpected behavior with AWS S3 Express storage.
1816+
* See https://github.com/aws/aws-sdk-java-v2/issues/6459 for details.
1817+
*/
1818+
public static final String REQUEST_MD5_HEADER =
1819+
"fs.s3a.request.md5.header";
1820+
1821+
/**
1822+
* Default value of {@link #REQUEST_MD5_HEADER}.
1823+
* Value: {@value}.
1824+
*/
1825+
public static final boolean DEFAULT_REQUEST_MD5_HEADER = true;
1826+
1827+
17891828
/**
17901829
* Are extensions classes, such as {@code fs.s3a.aws.credentials.provider},
17911830
* going to be loaded from the same classloader that loaded

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.slf4j.LoggerFactory;
3131

3232
import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
33+
import software.amazon.awssdk.core.checksums.RequestChecksumCalculation;
34+
import software.amazon.awssdk.core.checksums.ResponseChecksumValidation;
3335
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
3436
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
3537
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
@@ -40,6 +42,7 @@
4042
import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity;
4143
import software.amazon.awssdk.metrics.LoggingMetricPublisher;
4244
import software.amazon.awssdk.regions.Region;
45+
import software.amazon.awssdk.services.s3.LegacyMd5Plugin;
4346
import software.amazon.awssdk.services.s3.S3AsyncClient;
4447
import software.amazon.awssdk.services.s3.S3AsyncClientBuilder;
4548
import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
@@ -194,9 +197,32 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build
194197

195198
configureEndpointAndRegion(builder, parameters, conf);
196199

200+
// add a plugin to add a Content-MD5 header.
201+
// this is required when performing some operations with third party stores
202+
// (for example: bulk delete), and is somewhat harmless when working with AWS S3.
203+
if (parameters.isMd5HeaderEnabled()) {
204+
LOG.debug("MD5 header enabled");
205+
builder.addPlugin(LegacyMd5Plugin.create());
206+
}
207+
208+
//when to calculate request checksums.
209+
final RequestChecksumCalculation checksumCalculation =
210+
parameters.isChecksumCalculationEnabled()
211+
? RequestChecksumCalculation.WHEN_SUPPORTED
212+
: RequestChecksumCalculation.WHEN_REQUIRED;
213+
LOG.debug("Using checksum calculation policy: {}", checksumCalculation);
214+
builder.requestChecksumCalculation(checksumCalculation);
215+
216+
// response checksum validation. Slow, even with CRC32 checksums.
217+
final ResponseChecksumValidation checksumValidation;
218+
checksumValidation = parameters.isChecksumValidationEnabled()
219+
? ResponseChecksumValidation.WHEN_SUPPORTED
220+
: ResponseChecksumValidation.WHEN_REQUIRED;
221+
LOG.debug("Using checksum validation policy: {}", checksumValidation);
222+
builder.responseChecksumValidation(checksumValidation);
223+
197224
S3Configuration serviceConfiguration = S3Configuration.builder()
198225
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
199-
.checksumValidationEnabled(parameters.isChecksumValidationEnabled())
200226
.build();
201227

202228
final ClientOverrideConfiguration.Builder override =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,10 +1182,15 @@ private ClientManager createClientManager(URI fsURI, boolean dtEnabled) throws I
11821182
.withTransferManagerExecutor(unboundedThreadPool)
11831183
.withRegion(configuredRegion)
11841184
.withFipsEnabled(fipsEnabled)
1185+
.withS3ExpressStore(s3ExpressStore)
11851186
.withExpressCreateSession(
11861187
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT))
11871188
.withChecksumValidationEnabled(
11881189
conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT))
1190+
.withChecksumCalculationEnabled(
1191+
conf.getBoolean(CHECKSUM_GENERATION, DEFAULT_CHECKSUM_GENERATION))
1192+
.withMd5HeaderEnabled(conf.getBoolean(REQUEST_MD5_HEADER,
1193+
DEFAULT_REQUEST_MD5_HEADER))
11891194
.withClientSideEncryptionEnabled(isCSEEnabled)
11901195
.withClientSideEncryptionMaterials(cseMaterials)
11911196
.withAnalyticsAcceleratorEnabled(isAnalyticsAcceleratorEnabled)

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.s3a;
2020

21+
import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
2122
import software.amazon.awssdk.awscore.exception.AwsServiceException;
2223
import software.amazon.awssdk.core.exception.AbortedException;
2324
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
@@ -239,8 +240,13 @@ public static IOException translateException(@Nullable String operation,
239240
? (S3Exception) ase
240241
: null;
241242
int status = ase.statusCode();
242-
if (ase.awsErrorDetails() != null) {
243-
message = message + ":" + ase.awsErrorDetails().errorCode();
243+
// error details, may be null
244+
final AwsErrorDetails errorDetails = ase.awsErrorDetails();
245+
// error code, will be null if errorDetails is null
246+
String errorCode = "";
247+
if (errorDetails != null) {
248+
errorCode = errorDetails.errorCode();
249+
message = message + ":" + errorCode;
244250
}
245251

246252
// big switch on the HTTP status code.
@@ -307,6 +313,8 @@ public static IOException translateException(@Nullable String operation,
307313
// precondition failure: the object is there, but the precondition
308314
// (e.g. etag) didn't match. Assume remote file change during
309315
// rename or status passed in to openfile had an etag which didn't match.
316+
// See the SC_200 handler for the treatment of the S3 Express failure
317+
// variant.
310318
case SC_412_PRECONDITION_FAILED:
311319
ioe = new RemoteFileChangedException(path, message, "", ase);
312320
break;
@@ -351,6 +359,16 @@ public static IOException translateException(@Nullable String operation,
351359
return ((MultiObjectDeleteException) exception)
352360
.translateException(message);
353361
}
362+
if (PRECONDITION_FAILED.equals(errorCode)) {
363+
// S3 Express stores report conflict in conditional writes
364+
// as a 200 + an error code of "PreconditionFailed".
365+
// This is mapped to RemoteFileChangedException for consistency
366+
// with SC_412_PRECONDITION_FAILED handling.
367+
return new RemoteFileChangedException(path,
368+
operation,
369+
exception.getMessage(),
370+
exception);
371+
}
354372
// other 200: FALL THROUGH
355373

356374
default:

0 commit comments

Comments
 (0)