From 30c64abaf668147e20e6299cd80251f16ab0e56a Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Fri, 4 Jan 2013 12:22:34 -0800 Subject: [PATCH] Add validate_checksum arg to get_output Fixes #1000 --- boto/glacier/job.py | 27 +++++++++++---- boto/glacier/utils.py | 13 ++++++++ tests/unit/glacier/test_job.py | 60 ++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 tests/unit/glacier/test_job.py diff --git a/boto/glacier/job.py b/boto/glacier/job.py index 781bf38f48..c740174117 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -25,7 +25,7 @@ import socket from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError -from .utils import bytes_to_hex, chunk_hashes, tree_hash +from .utils import tree_hash_from_str class Job(object): @@ -59,7 +59,7 @@ def __init__(self, vault, response_data=None): def __repr__(self): return 'Job(%s)' % self.arn - def get_output(self, byte_range=None): + def get_output(self, byte_range=None, validate_checksum=False): """ This operation downloads the output of the job. Depending on the job type you specified when you initiated the job, the @@ -77,10 +77,25 @@ def get_output(self, byte_range=None): :type byte_range: tuple :param range: A tuple of integer specifying the slice (in bytes) of the archive you want to receive + + :type validate_checksum: bool + :param validate_checksum: Specify whether or not to validate + the associate tree hash. If the response does not contain + a TreeHash, then no checksum will be verified. + """ - return self.vault.layer1.get_job_output(self.vault.name, - self.id, - byte_range) + response = self.vault.layer1.get_job_output(self.vault.name, + self.id, + byte_range) + if validate_checksum and 'TreeHash' in response: + data = response.read() + actual_tree_hash = tree_hash_from_str(data) + if response['TreeHash'] != actual_tree_hash: + raise TreeHashDoesNotMatchError( + "The calculated tree hash %s does not match the " + "expected tree hash %s for the byte range %s" % ( + actual_tree_hash, response['TreeHash'], byte_range)) + return response def download_to_file(self, filename, chunk_size=DefaultPartSize, verify_hashes=True, retry_exceptions=(socket.error,)): @@ -111,7 +126,7 @@ def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes, data, expected_tree_hash = self._download_byte_range( byte_range, retry_exceptions) if verify_hashes: - actual_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(data))) + actual_tree_hash = tree_hash_from_str(data) if expected_tree_hash != actual_tree_hash: raise TreeHashDoesNotMatchError( "The calculated tree hash %s does not match the " diff --git a/boto/glacier/utils.py b/boto/glacier/utils.py index 5f26f6f38f..be7bc1da30 100644 --- a/boto/glacier/utils.py +++ b/boto/glacier/utils.py @@ -114,6 +114,19 @@ def bytes_to_hex(str_as_bytes): return ''.join(["%02x" % ord(x) for x in str_as_bytes]).strip() +def tree_hash_from_str(str_as_bytes): + """ + + :type str_as_bytes: str + :param str_as_bytes: The string for which to compute the tree hash. + + :rtype: str + :return: The computed tree hash, returned as hex. + + """ + return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes))) + + class ResettingFileSender(object): def __init__(self, archive): self._archive = archive diff --git a/tests/unit/glacier/test_job.py b/tests/unit/glacier/test_job.py new file mode 100644 index 0000000000..277fb85387 --- /dev/null +++ b/tests/unit/glacier/test_job.py @@ -0,0 +1,60 @@ +# Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +from tests.unit import unittest +import mock + +from boto.glacier.job import Job +from boto.glacier.layer1 import Layer1 +from boto.glacier.response import GlacierResponse +from boto.glacier.exceptions import TreeHashDoesNotMatchError + + +class TestJob(unittest.TestCase): + def setUp(self): + self.api = mock.Mock(spec=Layer1) + self.vault = mock.Mock() + self.vault.layer1 = self.api + self.job = Job(self.vault) + + def test_get_job_validate_checksum_success(self): + response = GlacierResponse(mock.Mock(), None) + response['TreeHash'] = 'tree_hash' + self.api.get_job_output.return_value = response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'tree_hash' + self.job.get_output(byte_range=(1, 1024), validate_checksum=True) + + def test_get_job_validation_fails(self): + response = GlacierResponse(mock.Mock(), None) + response['TreeHash'] = 'tree_hash' + self.api.get_job_output.return_value = response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'BAD_TREE_HASH_VALUE' + with self.assertRaises(TreeHashDoesNotMatchError): + # With validate_checksum set to True, this call fails. + self.job.get_output(byte_range=(1, 1024), validate_checksum=True) + # With validate_checksum set to False, this call succeeds. + self.job.get_output(byte_range=(1, 1024), validate_checksum=False) + + +if __name__ == '__main__': + unittest.main()