Skip to content

Commit

Permalink
Validate checksum fix (#224)
Browse files Browse the repository at this point in the history
- Support list of checksum algorithms to validate
- Changed the behavior that:
   - When all the parts of the object checksum have been validated, we still tell user that we did not validate the checksum
   - When some part of the object checksum validation failed, we tell user checksum validation failed, but we did not validate the checksum.


TODO: 
- Tests for checksum validation failed. <- We don't have mock server, we cannot control that...
  • Loading branch information
TingDaoK committed Oct 26, 2022
1 parent b1a033d commit a41255e
Show file tree
Hide file tree
Showing 14 changed files with 256 additions and 45 deletions.
2 changes: 2 additions & 0 deletions include/aws/s3/private/s3_auto_ranged_get.h
Expand Up @@ -17,6 +17,7 @@ enum aws_s3_auto_ranged_get_request_type {
struct aws_s3_auto_ranged_get {
struct aws_s3_meta_request base;

enum aws_s3_checksum_algorithm validation_algorithm;
/* Members to only be used when the mutex in the base type is locked. */
struct {
/* The starting byte of the data that we will be retrieved from the object.*/
Expand All @@ -33,6 +34,7 @@ struct aws_s3_auto_ranged_get {
uint32_t num_parts_completed;
uint32_t num_parts_successful;
uint32_t num_parts_failed;
uint32_t num_parts_checksum_validated;

uint32_t object_range_known : 1;
uint32_t head_object_sent : 1;
Expand Down
20 changes: 19 additions & 1 deletion include/aws/s3/private/s3_checksums.h
Expand Up @@ -26,6 +26,18 @@ struct aws_s3_checksum {
bool good;
};

struct checksum_config {
enum aws_s3_checksum_location location;
enum aws_s3_checksum_algorithm checksum_algorithm;
bool validate_response_checksum;
struct {
bool crc32c;
bool crc32;
bool sha1;
bool sha256;
} response_checksum_algorithms;
};

/**
* a stream that takes in a stream, computes a running checksum as it is read, and outputs the checksum when the stream
* is destroyed.
Expand All @@ -48,6 +60,8 @@ struct aws_input_stream *aws_checksum_stream_new(
struct aws_byte_buf *checksum_output);

/**
* TODO: properly support chunked encoding.
*
* A stream that takes in a stream, encodes it to aws_chunked. Computes a running checksum as it is read and add the
* checksum as trailer at the end of the stream. All of the added bytes will be counted to the length of the stream.
* Note: seek this stream will immediately fail, as it would prevent an accurate calculation of the
Expand Down Expand Up @@ -128,4 +142,8 @@ int aws_checksum_update(struct aws_s3_checksum *checksum, const struct aws_byte_
*/
AWS_S3_API
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output, size_t truncate_to);
#endif

AWS_S3_API
void checksum_config_init(struct checksum_config *internal_config, const struct aws_s3_checksum_config *config);

#endif /* AWS_S3_CHECKSUMS_H */
9 changes: 8 additions & 1 deletion include/aws/s3/private/s3_meta_request_impl.h
Expand Up @@ -14,6 +14,7 @@
#include <aws/common/task_scheduler.h>
#include <aws/http/request_response.h>

#include "aws/s3/private/s3_checksums.h"
#include "aws/s3/private/s3_client_impl.h"
#include "aws/s3/private/s3_request.h"

Expand Down Expand Up @@ -190,7 +191,8 @@ struct aws_s3_meta_request {

const bool should_compute_content_md5;

struct aws_s3_checksum_config checksum_config;
/* deep copy of the checksum config. */
struct checksum_config checksum_config;

/* checksum found in either a default get request, or in the initial head request of a mutlipart get */
struct aws_byte_buf meta_request_level_response_header_checksum;
Expand Down Expand Up @@ -334,6 +336,11 @@ void aws_s3_meta_request_result_clean_up(
struct aws_s3_meta_request *meta_request,
struct aws_s3_meta_request_result *result);

AWS_S3_API
bool aws_s3_meta_request_checksum_config_has_algorithm(
struct aws_s3_meta_request *meta_request,
enum aws_s3_checksum_algorithm algorithm);

AWS_EXTERN_C_END

#endif /* AWS_S3_META_REQUEST_IMPL_H */
4 changes: 4 additions & 0 deletions include/aws/s3/private/s3_request.h
Expand Up @@ -11,6 +11,8 @@
#include <aws/common/ref_count.h>
#include <aws/s3/s3.h>

#include <aws/s3/private/s3_checksums.h>

struct aws_http_message;
struct aws_signable;
struct aws_s3_meta_request;
Expand Down Expand Up @@ -63,6 +65,8 @@ struct aws_s3_request {

/* running checksum of the respone to an individual get part http request */
struct aws_s3_checksum *request_level_running_response_sum;
/* The algorithm used to validate the checksum */
enum aws_s3_checksum_algorithm validation_algorithm;

/* Get request only, was there a checksum to validate */
bool did_validate;
Expand Down
5 changes: 3 additions & 2 deletions include/aws/s3/private/s3_request_messages.h
Expand Up @@ -17,6 +17,7 @@ struct aws_byte_buf;
struct aws_byte_cursor;
struct aws_string;
struct aws_array_list;
struct checksum_config;

AWS_EXTERN_C_BEGIN

Expand Down Expand Up @@ -51,7 +52,7 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
struct aws_allocator *allocator,
struct aws_byte_buf *byte_buf,
struct aws_http_message *out_message,
const struct aws_s3_checksum_config *checksum_config,
const struct checksum_config *checksum_config,
struct aws_byte_buf *out_checksum);

/* Return true if checksum headers has been set. */
Expand Down Expand Up @@ -92,7 +93,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
uint32_t part_number,
const struct aws_string *upload_id,
bool should_compute_content_md5,
const struct aws_s3_checksum_config *checksum_config,
const struct checksum_config *checksum_config,
struct aws_byte_buf *encoded_checksum_output);

/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
Expand Down
15 changes: 15 additions & 0 deletions include/aws/s3/s3_client.h
Expand Up @@ -318,6 +318,21 @@ struct aws_s3_checksum_config {
* checksum, and checksum found in the response header do not match.
*/
bool validate_response_checksum;

/**
* Optional array of `enum aws_s3_checksum_algorithm`.
*
* Ignored when validate_response_checksum is not set.
* If not set all the algorithms will be selected as default behavior.
* Owned by the caller.
*
* The list of algorithms for user to pick up when validate the checksum. Client will pick up the algorithm from the
* list with the priority based on performance, and the algorithm sent by server. The priority based on performance
* is [CRC32C, CRC32, SHA1, SHA256].
*
* If the response checksum was validated by client, the result will indicate which algorithm was picked.
*/
struct aws_array_list *validate_checksum_algorithms;
};

/* Options for a new meta request, ie, file transfer that will be handled by the high performance client. */
Expand Down
24 changes: 23 additions & 1 deletion source/s3_auto_ranged_get.c
Expand Up @@ -159,7 +159,6 @@ static bool s_s3_auto_ranged_get_update(
/* If there exists a range header or we require validation of the response checksum, we currently always
* do a head request first.
* S3 returns the checksum of the entire object from the HEAD response
* TODO: remove the head for checksum. Revamp the validation
*
* For the range header value could be parsed client-side, doing so presents a number of
* complications. For example, the given range could be an unsatisfiable range, and might not even
Expand Down Expand Up @@ -319,6 +318,13 @@ static bool s_s3_auto_ranged_get_update(

if (!work_remaining) {
aws_s3_meta_request_set_success_synced(meta_request, s_s3_auto_ranged_get_success_status(meta_request));
if (auto_ranged_get->synced_data.num_parts_checksum_validated ==
auto_ranged_get->synced_data.num_parts_requested) {
/* If we have validated the checksum for every parts, we set the meta request level checksum validation
* result.*/
meta_request->synced_data.finish_result.did_validate = true;
meta_request->synced_data.finish_result.validation_algorithm = auto_ranged_get->validation_algorithm;
}
}

aws_s3_meta_request_unlock_synced_data(meta_request);
Expand Down Expand Up @@ -632,6 +638,16 @@ static void s_s3_auto_ranged_get_request_finished(
++auto_ranged_get->synced_data.num_parts_completed;

if (!request_failed) {

/* Record the number of parts that checksum has been validated */
if (request->did_validate) {
if (auto_ranged_get->validation_algorithm == AWS_SCA_NONE) {
auto_ranged_get->validation_algorithm = request->validation_algorithm;
}
/* They should be the same. */
AWS_ASSERT(auto_ranged_get->validation_algorithm == request->validation_algorithm);
++auto_ranged_get->synced_data.num_parts_checksum_validated;
}
++auto_ranged_get->synced_data.num_parts_successful;

aws_s3_meta_request_stream_response_body_synced(meta_request, request);
Expand All @@ -656,6 +672,12 @@ static void s_s3_auto_ranged_get_request_finished(

if (error_code != AWS_ERROR_SUCCESS) {
aws_s3_meta_request_set_fail_synced(meta_request, request, error_code);
if (error_code == AWS_ERROR_S3_RESPONSE_CHECKSUM_MISMATCH) {
/* It's a mismatch of checksum, tell user that we validated the checksum and the algorithm we validated
*/
meta_request->synced_data.finish_result.did_validate = true;
meta_request->synced_data.finish_result.validation_algorithm = request->validation_algorithm;
}
}

aws_s3_meta_request_unlock_synced_data(meta_request);
Expand Down
40 changes: 40 additions & 0 deletions source/s3_checksums.c
Expand Up @@ -268,3 +268,43 @@ int aws_checksum_compute(
return AWS_OP_ERR;
}
}

void checksum_config_init(struct checksum_config *internal_config, const struct aws_s3_checksum_config *config) {
AWS_ZERO_STRUCT(*internal_config);
if (!config) {
return;
}
internal_config->checksum_algorithm = config->checksum_algorithm;
internal_config->location = config->location;
internal_config->validate_response_checksum = config->validate_response_checksum;

if (config->validate_checksum_algorithms) {
const size_t count = aws_array_list_length(config->validate_checksum_algorithms);
for (size_t i = 0; i < count; ++i) {
enum aws_s3_checksum_algorithm algorithm;
aws_array_list_get_at(config->validate_checksum_algorithms, &algorithm, i);
switch (algorithm) {
case AWS_SCA_CRC32C:
internal_config->response_checksum_algorithms.crc32c = true;
break;
case AWS_SCA_CRC32:
internal_config->response_checksum_algorithms.crc32 = true;
break;
case AWS_SCA_SHA1:
internal_config->response_checksum_algorithms.sha1 = true;
break;
case AWS_SCA_SHA256:
internal_config->response_checksum_algorithms.sha256 = true;
break;
default:
break;
}
}

} else if (config->validate_response_checksum) {
internal_config->response_checksum_algorithms.crc32 = true;
internal_config->response_checksum_algorithms.crc32c = true;
internal_config->response_checksum_algorithms.sha1 = true;
internal_config->response_checksum_algorithms.sha256 = true;
}
}

0 comments on commit a41255e

Please sign in to comment.