Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
Merge branch 'glacier-tree-hash' into develop
Browse files Browse the repository at this point in the history
Fixes #1083.

* glacier-tree-hash:
  Account for empty archives in hash calculations
  • Loading branch information
jamesls committed Jan 7, 2013
2 parents 0ec310c + 30485a3 commit 52f6720
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 9 deletions.
4 changes: 4 additions & 0 deletions boto/glacier/utils.py
Expand Up @@ -56,6 +56,8 @@ def chunk_hashes(bytestring, chunk_size=_MEGABYTE):
start = i * chunk_size
end = (i + 1) * chunk_size
hashes.append(hashlib.sha256(bytestring[start:end]).digest())
if not hashes:
return [hashlib.sha256('').digest()]
return hashes


Expand Down Expand Up @@ -107,6 +109,8 @@ def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024):
linear_hash.update(chunk)
chunks.append(hashlib.sha256(chunk).digest())
chunk = fileobj.read(chunk_size)
if not chunks:
chunks = [hashlib.sha256('').digest()]
return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks))


Expand Down
58 changes: 49 additions & 9 deletions tests/unit/glacier/test_utils.py
Expand Up @@ -19,53 +19,93 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import time
import logging
from hashlib import sha256
from tests.unit import unittest

from boto.glacier import utils
from boto.glacier.utils import minimum_part_size, chunk_hashes, tree_hash, \
bytes_to_hex


class TestPartSizeCalculations(unittest.TestCase):
def test_small_values_still_use_default_part_size(self):
self.assertEqual(utils.minimum_part_size(1), 4 * 1024 * 1024)
self.assertEqual(minimum_part_size(1), 4 * 1024 * 1024)

def test_under_the_maximum_value(self):
# If we're under the maximum, we can use 4MB part sizes.
self.assertEqual(utils.minimum_part_size(8 * 1024 * 1024),
self.assertEqual(minimum_part_size(8 * 1024 * 1024),
4 * 1024 * 1024)

def test_gigabyte_size(self):
# If we're over the maximum default part size, we go up to the next
# power of two until we find a part size that keeps us under 10,000
# parts.
self.assertEqual(utils.minimum_part_size(8 * 1024 * 1024 * 10000),
self.assertEqual(minimum_part_size(8 * 1024 * 1024 * 10000),
8 * 1024 * 1024)

def test_terabyte_size(self):
# For a 4 TB file we need at least a 512 MB part size.
self.assertEqual(utils.minimum_part_size(4 * 1024 * 1024 * 1024 * 1024),
self.assertEqual(minimum_part_size(4 * 1024 * 1024 * 1024 * 1024),
512 * 1024 * 1024)

def test_file_size_too_large(self):
with self.assertRaises(ValueError):
utils.minimum_part_size((40000 * 1024 * 1024 * 1024) + 1)
minimum_part_size((40000 * 1024 * 1024 * 1024) + 1)


class TestChunking(unittest.TestCase):
def test_chunk_hashes_exact(self):
chunks = utils.chunk_hashes('a' * (2 * 1024 * 1024))
chunks = chunk_hashes('a' * (2 * 1024 * 1024))
self.assertEqual(len(chunks), 2)
self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())

def test_chunks_with_leftovers(self):
bytestring = 'a' * (2 * 1024 * 1024 + 20)
chunks = utils.chunk_hashes(bytestring)
chunks = chunk_hashes(bytestring)
self.assertEqual(len(chunks), 3)
self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())
self.assertEqual(chunks[1], sha256('a' * 1024 * 1024).digest())
self.assertEqual(chunks[2], sha256('a' * 20).digest())

def test_less_than_one_chunk(self):
chunks = utils.chunk_hashes('aaaa')
chunks = chunk_hashes('aaaa')
self.assertEqual(len(chunks), 1)
self.assertEqual(chunks[0], sha256('aaaa').digest())


class TestTreeHash(unittest.TestCase):
# For these tests, a set of reference tree hashes were computed.
# This will at least catch any regressions to the tree hash
# calculations.
def calculate_tree_hash(self, bytestring):
start = time.time()
calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
end = time.time()
logging.debug("Tree hash calc time for length %s: %s",
len(bytestring), end - start)
return calculated

def test_tree_hash_calculations(self):
one_meg_bytestring = 'a' * (1 * 1024 * 1024)
two_meg_bytestring = 'a' * (2 * 1024 * 1024)
four_meg_bytestring = 'a' * (4 * 1024 * 1024)
bigger_bytestring = four_meg_bytestring + 'a' * 20

self.assertEqual(
self.calculate_tree_hash(one_meg_bytestring),
'9bc1b2a288b26af7257a36277ae3816a7d4f16e89c1e7e77d0a5c48bad62b360')
self.assertEqual(
self.calculate_tree_hash(two_meg_bytestring),
'560c2c9333c719cb00cfdffee3ba293db17f58743cdd1f7e4055373ae6300afa')
self.assertEqual(
self.calculate_tree_hash(four_meg_bytestring),
'9491cb2ed1d4e7cd53215f4017c23ec4ad21d7050a1e6bb636c4f67e8cddb844')
self.assertEqual(
self.calculate_tree_hash(bigger_bytestring),
'12f3cbd6101b981cde074039f6f728071da8879d6f632de8afc7cdf00661b08f')

def test_empty_tree_hash(self):
self.assertEqual(
self.calculate_tree_hash(''),
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')

0 comments on commit 52f6720

Please sign in to comment.