Permalink
Browse files

Use Vault.DefaultPartSize is possible

As mentioned in #1264, self.DefaultPartSize is not
considered when calculating the minimum part size.
This is an issue whenever
vault.DefaultPartSize != utils.DEFAULT_PART_SIZE.

Also added a unit test to verify that DefaultPartSize is
obeyed (provided that it can accomodate the file size).
  • Loading branch information...
1 parent 03fe126 commit 468c7530bf0d63b489fa9b17b2962f2a25f36ec1 @jamesls jamesls committed Jan 23, 2013
Showing with 72 additions and 17 deletions.
  1. +22 −3 boto/glacier/utils.py
  2. +2 −3 boto/glacier/vault.py
  3. +5 −0 tests/unit/glacier/test_utils.py
  4. +43 −11 tests/unit/glacier/test_vault.py
View
@@ -28,14 +28,33 @@
MAXIMUM_NUMBER_OF_PARTS = 10000
-def minimum_part_size(size_in_bytes):
+def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE):
+ """Calculate the minimum part size needed for a multipart upload.
+
+ Glacier allows a maximum of 10,000 parts per upload. It also
+ states that the maximum archive size is 10,000 * 4 GB, which means
+ the part size can range from 1MB to 4GB (provided it is one 1MB
+ multiplied by a power of 2).
+
+ This function will compute what the minimum part size must be in
+ order to upload a file of size ``size_in_bytes``.
+
+ It will first check if ``default_part_size`` is sufficient for
+ a part size given the ``size_in_bytes``. If this is not the case,
+ then the smallest part size than can accomodate a file of size
+ ``size_in_bytes`` will be returned.
+
+ If the file size is greater than the maximum allowed archive
+ size of 10,000 * 4GB, a ``ValueError`` will be raised.
+
+ """
# The default part size (4 MB) will be too small for a very large
# archive, as there is a limit of 10,000 parts in a multipart upload.
# This puts the maximum allowed archive size with the default part size
# at 40,000 MB. We need to do a sanity check on the part size, and find
# one that works if the default is too small.
part_size = _MEGABYTE
- if (DEFAULT_PART_SIZE * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes:
+ if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes:
if size_in_bytes > (4096 * _MEGABYTE * 10000):
raise ValueError("File size too large: %s" % size_in_bytes)
min_part_size = size_in_bytes / 10000
@@ -45,7 +64,7 @@ def minimum_part_size(size_in_bytes):
power += 1
part_size = int(part_size)
else:
- part_size = DEFAULT_PART_SIZE
+ part_size = default_part_size
return part_size
View
@@ -161,9 +161,8 @@ def create_archive_from_file(self, filename=None, file_obj=None,
if not file_obj:
file_size = os.path.getsize(filename)
try:
- min_part_size = minimum_part_size(file_size)
- if (min_part_size>part_size):
- part_size=min_part_size
+ min_part_size = minimum_part_size(file_size,
+ self.DefaultPartSize)
except ValueError:
raise UploadArchiveError("File size of %s bytes exceeds "
"40,000 GB archive limit of Glacier.")
@@ -53,6 +53,11 @@ def test_file_size_too_large(self):
with self.assertRaises(ValueError):
minimum_part_size((40000 * 1024 * 1024 * 1024) + 1)
+ def test_default_part_size_can_be_specified(self):
+ default_part_size = 2 * 1024 * 1024
+ self.assertEqual(minimum_part_size(8 * 1024 * 1024, default_part_size),
+ default_part_size)
+
class TestChunking(unittest.TestCase):
def test_chunk_hashes_exact(self):
@@ -33,25 +33,57 @@ class TestVault(unittest.TestCase):
def setUp(self):
self.size_patch = mock.patch('os.path.getsize')
self.getsize = self.size_patch.start()
+ self.api = mock.Mock()
+ self.vault = vault.Vault(self.api, None)
+ self.vault.name = 'myvault'
+ self.mock_open = mock.mock_open()
+ stringio = StringIO('content')
+ self.mock_open.return_value.read = stringio.read
def tearDown(self):
self.size_patch.stop()
def test_upload_archive_small_file(self):
- api = mock.Mock()
- v = vault.Vault(api, None)
- v.name = 'myvault'
self.getsize.return_value = 1
- stringio = StringIO('content')
- m = mock.mock_open()
- m.return_value.read = stringio.read
- api.upload_archive.return_value = {'ArchiveId': 'archive_id'}
- with mock.patch('boto.glacier.vault.open', m, create=True):
- archive_id = v.upload_archive('filename', 'my description')
+ self.api.upload_archive.return_value = {'ArchiveId': 'archive_id'}
+ with mock.patch('boto.glacier.vault.open', self.mock_open,
+ create=True):
+ archive_id = self.vault.upload_archive(
+ 'filename', 'my description')
self.assertEqual(archive_id, 'archive_id')
- api.upload_archive.assert_called_with('myvault', m.return_value, ANY,
- ANY, 'my description')
+ self.api.upload_archive.assert_called_with(
+ 'myvault', self.mock_open.return_value,
+ mock.ANY, mock.ANY, 'my description')
+
+ def test_small_part_size_is_obeyed(self):
+ self.vault.DefaultPartSize = 2 * 1024 * 1024
+ self.vault.create_archive_writer = mock.Mock()
+
+ self.getsize.return_value = 1
+
+ with mock.patch('boto.glacier.vault.open', self.mock_open,
+ create=True):
+ self.vault.create_archive_from_file('myfile')
+ # The write should be created with the default part size of the
+ # instance (2 MB).
+ self.vault.create_archive_writer.assert_called_with(
+ description=mock.ANY, part_size=self.vault.DefaultPartSize)
+
+ def test_large_part_size_is_obeyed(self):
+ self.vault.DefaultPartSize = 8 * 1024 * 1024
+ self.vault.create_archive_writer = mock.Mock()
+ self.getsize.return_value = 1
+ with mock.patch('boto.glacier.vault.open', self.mock_open,
+ create=True):
+ self.vault.create_archive_from_file('myfile')
+ # The write should be created with the default part size of the
+ # instance (8 MB).
+ self.vault.create_archive_writer.assert_called_with(
+ description=mock.ANY, part_size=self.vault.DefaultPartSize)
+
+
+class TestConcurrentUploads(unittest.TestCase):
def test_concurrent_upload_file(self):
v = vault.Vault(None, None)

0 comments on commit 468c753

Please sign in to comment.