From 2d4fd39c60e3ce50a5b8f08a7d1a29779d7ea8d2 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 4 Mar 2015 16:50:25 +0100 Subject: [PATCH] Check MD5 while doing snapshot There is a feature available in S3 that clients can use to ensure data integrity on upload. Whenever an object is PUT to an S3 bucket, the client is able to get back the `MD5` base64 encoded and check that it's the same `MD5` as the local one. For reference, please see the [S3 PutObject API](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html). Closes #186. --- .../aws/blobstore/DefaultS3OutputStream.java | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/cloud/aws/blobstore/DefaultS3OutputStream.java b/src/main/java/org/elasticsearch/cloud/aws/blobstore/DefaultS3OutputStream.java index 04d30f2b..c26e8888 100644 --- a/src/main/java/org/elasticsearch/cloud/aws/blobstore/DefaultS3OutputStream.java +++ b/src/main/java/org/elasticsearch/cloud/aws/blobstore/DefaultS3OutputStream.java @@ -21,12 +21,18 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.services.s3.model.*; +import com.amazonaws.util.Base64; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; @@ -49,7 +55,7 @@ public class DefaultS3OutputStream extends S3OutputStream { private static final ByteSizeValue MULTIPART_MAX_SIZE = new ByteSizeValue(5, ByteSizeUnit.GB); - + private static final ESLogger logger = Loggers.getLogger("cloud.aws"); /** * Multipart Upload API data */ @@ -120,7 +126,28 @@ protected void doUpload(S3BlobStore blobStore, String bucketName, String blobNam md.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } md.setContentLength(length); - blobStore.client().putObject(bucketName, blobName, is, md); + + InputStream inputStream = is; + + // We try to compute a MD5 while reading it + MessageDigest messageDigest; + try { + messageDigest = MessageDigest.getInstance("MD5"); + inputStream = new DigestInputStream(is, messageDigest); + } catch (NoSuchAlgorithmException impossible) { + // Every implementation of the Java platform is required to support MD5 (see MessageDigest) + throw new RuntimeException(impossible); + } + PutObjectResult putObjectResult = blobStore.client().putObject(bucketName, blobName, inputStream, md); + + String localMd5 = Base64.encodeAsString(messageDigest.digest()); + String remoteMd5 = putObjectResult.getContentMd5(); + if (!localMd5.equals(remoteMd5)) { + logger.debug("MD5 local [{}], remote [{}] are not equal...", localMd5, remoteMd5); + throw new AmazonS3Exception("MD5 local [" + localMd5 + + "], remote [" + remoteMd5 + + "] are not equal..."); + } } private void initializeMultipart() {