Skip to content

Commit

Permalink
Check MD5 while doing snapshot
Browse files Browse the repository at this point in the history
There is a feature available in S3 that clients can use to ensure data integrity on upload. Whenever an object is PUT to an S3 bucket, the client is able to get back the `MD5` base64 encoded and check that it's the same `MD5` as the local one.

 For reference, please see the [S3 PutObject API](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html).

 Closes #186.
  • Loading branch information
dadoonet committed May 20, 2015
1 parent 9206b0d commit 2d4fd39
Showing 1 changed file with 29 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,18 @@

import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.*;
import com.amazonaws.util.Base64;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;

Expand All @@ -49,7 +55,7 @@
public class DefaultS3OutputStream extends S3OutputStream {

private static final ByteSizeValue MULTIPART_MAX_SIZE = new ByteSizeValue(5, ByteSizeUnit.GB);

private static final ESLogger logger = Loggers.getLogger("cloud.aws");
/**
* Multipart Upload API data
*/
Expand Down Expand Up @@ -120,7 +126,28 @@ protected void doUpload(S3BlobStore blobStore, String bucketName, String blobNam
md.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
}
md.setContentLength(length);
blobStore.client().putObject(bucketName, blobName, is, md);

InputStream inputStream = is;

// We try to compute a MD5 while reading it
MessageDigest messageDigest;
try {
messageDigest = MessageDigest.getInstance("MD5");
inputStream = new DigestInputStream(is, messageDigest);
} catch (NoSuchAlgorithmException impossible) {
// Every implementation of the Java platform is required to support MD5 (see MessageDigest)
throw new RuntimeException(impossible);
}
PutObjectResult putObjectResult = blobStore.client().putObject(bucketName, blobName, inputStream, md);

String localMd5 = Base64.encodeAsString(messageDigest.digest());
String remoteMd5 = putObjectResult.getContentMd5();
if (!localMd5.equals(remoteMd5)) {
logger.debug("MD5 local [{}], remote [{}] are not equal...", localMd5, remoteMd5);
throw new AmazonS3Exception("MD5 local [" + localMd5 +
"], remote [" + remoteMd5 +
"] are not equal...");
}
}

private void initializeMultipart() {
Expand Down

0 comments on commit 2d4fd39

Please sign in to comment.