Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'basak-glacier-upload-resume' into develop

  • Loading branch information...
commit 2af2774041c26dbcc652512420c77399391cd5f9 2 parents fe2359f + ef4fa93
@jamesls jamesls authored
View
92 boto/glacier/vault.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
+# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
@@ -23,7 +24,7 @@
from __future__ import with_statement
from .exceptions import UploadArchiveError
from .job import Job
-from .writer import Writer, compute_hashes_from_fileobj
+from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer
from .concurrent import ConcurrentUploader
from .utils import minimum_part_size, DEFAULT_PART_SIZE
import os.path
@@ -126,7 +127,7 @@ def create_archive_writer(self, part_size=DefaultPartSize,
return Writer(self, response['UploadId'], part_size=part_size)
def create_archive_from_file(self, filename=None, file_obj=None,
- description=None):
+ description=None, upload_id_callback=None):
"""
Create a new archive and upload the data from the given file
or file-like object.
@@ -140,6 +141,11 @@ def create_archive_from_file(self, filename=None, file_obj=None,
:type description: str
:param description: An optional description for the archive.
+ :type upload_id_callback: function
+ :param upload_id_callback: if set, call with the upload_id as the
+ only parameter when it becomes known, to enable future calls
+ to resume_archive_from_file in case resume is needed.
+
:rtype: str
:return: The archive id of the newly created archive
"""
@@ -152,8 +158,11 @@ def create_archive_from_file(self, filename=None, file_obj=None,
raise UploadArchiveError("File size of %s bytes exceeds "
"40,000 GB archive limit of Glacier.")
file_obj = open(filename, "rb")
- writer = self.create_archive_writer(description=description,
+ writer = self.create_archive_writer(
+ description=description,
part_size=part_size)
+ if upload_id_callback:
+ upload_id_callback(writer.upload_id)
while True:
data = file_obj.read(part_size)
if not data:
@@ -162,6 +171,63 @@ def create_archive_from_file(self, filename=None, file_obj=None,
writer.close()
return writer.get_archive_id()
+ @staticmethod
+ def _range_string_to_part_index(range_string, part_size):
+ start, inside_end = [int(value) for value in range_string.split('-')]
+ end = inside_end + 1
+ length = end - start
+ if length == part_size + 1:
+ # Off-by-one bug in Amazon's Glacier implementation,
+ # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
+ # Workaround: since part_size is too big by one byte, adjust it
+ end -= 1
+ inside_end -= 1
+ length -= 1
+ assert not (start % part_size), (
+ "upload part start byte is not on a part boundary")
+ assert (length <= part_size), "upload part is bigger than part size"
+ return start // part_size
+
+ def resume_archive_from_file(self, upload_id, filename=None,
+ file_obj=None):
+ """Resume upload of a file already part-uploaded to Glacier.
+
+ The resumption of an upload where the part-uploaded section is empty
+ is a valid degenerate case that this function can handle.
+
+ One and only one of filename or file_obj must be specified.
+
+ :type upload_id: str
+ :param upload_id: existing Glacier upload id of upload being resumed.
+
+ :type filename: str
+ :param filename: file to open for resume
+
+ :type fobj: file
+ :param fobj: file-like object containing local data to resume. This
+ must read from the start of the entire upload, not just from the
+ point being resumed. Use fobj.seek(0) to achieve this if necessary.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+
+ """
+ part_list_response = self.list_all_parts(upload_id)
+ part_size = part_list_response['PartSizeInBytes']
+
+ part_hash_map = {}
+ for part_desc in part_list_response['Parts']:
+ part_index = self._range_string_to_part_index(
+ part_desc['RangeInBytes'], part_size)
+ part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')
+ part_hash_map[part_index] = part_tree_hash
+
+ if not file_obj:
+ file_obj = open(filename, "rb")
+
+ return resume_file_upload(
+ self, upload_id, part_size, file_obj, part_hash_map)
+
def concurrent_create_archive_from_file(self, filename):
"""
Create a new archive from a file and upload the given
@@ -290,3 +356,23 @@ def list_jobs(self, completed=None, status_code=None):
response_data = self.layer1.list_jobs(self.name, completed,
status_code)
return [Job(self, jd) for jd in response_data['JobList']]
+
+ def list_all_parts(self, upload_id):
+ """Automatically make and combine multiple calls to list_parts.
+
+ Call list_parts as necessary, combining the results in case multiple
+ calls were required to get data on all available parts.
+
+ """
+ result = self.layer1.list_parts(self.name, upload_id)
+ marker = result['Marker']
+ while marker:
+ additional_result = self.layer1.list_parts(
+ self.name, upload_id, marker=marker)
+ result['Parts'].extend(additional_result['Parts'])
+ marker = additional_result['Marker']
+ # The marker makes no sense in an unpaginated result, and clearing it
+ # makes testing easier. This also has the nice property that the result
+ # is a normal (but expanded) response.
+ result['Marker'] = None
+ return result
View
217 boto/glacier/writer.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
+# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
# Tree hash implementation from Aaron Brady bradya@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a
@@ -90,81 +91,213 @@ def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024):
return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks))
-def bytes_to_hex(str):
- return ''.join(["%02x" % ord(x) for x in str]).strip()
+def bytes_to_hex(str_as_bytes):
+ return ''.join(["%02x" % ord(x) for x in str_as_bytes]).strip()
-class Writer(object):
- """
- Presents a file-like object for writing to a Amazon Glacier
- Archive. The data is written using the multi-part upload API.
+class _Partitioner(object):
+ """Convert variable-size writes into part-sized writes
+
+ Call write(data) with variable sized data as needed to write all data. Call
+ flush() after all data is written.
+
+ This instance will call send_fn(part_data) as needed in part_size pieces,
+ except for the final part which may be shorter than part_size. Make sure to
+ call flush() to ensure that a short final part results in a final send_fn
+ call.
+
"""
- def __init__(self, vault, upload_id, part_size):
- self.vault = vault
- self.upload_id = upload_id
+ def __init__(self, part_size, send_fn):
self.part_size = part_size
-
- self._buffer_size = 0
- self._uploaded_size = 0
+ self.send_fn = send_fn
self._buffer = []
- self._tree_hashes = []
+ self._buffer_size = 0
- self.archive_location = None
- self.closed = False
+ def write(self, data):
+ if data == '':
+ return
+ self._buffer.append(data)
+ self._buffer_size += len(data)
+ while self._buffer_size > self.part_size:
+ self._send_part()
- def send_part(self):
- buf = "".join(self._buffer)
+ def _send_part(self):
+ data = ''.join(self._buffer)
# Put back any data remaining over the part size into the
# buffer
- if len(buf) > self.part_size:
- self._buffer = [buf[self.part_size:]]
+ if len(data) > self.part_size:
+ self._buffer = [data[self.part_size:]]
self._buffer_size = len(self._buffer[0])
else:
self._buffer = []
self._buffer_size = 0
# The part we will send
- part = buf[:self.part_size]
+ part = data[:self.part_size]
+ self.send_fn(part)
+
+ def flush(self):
+ if self._buffer_size > 0:
+ self._send_part()
+
+
+class _Uploader(object):
+ """Upload to a Glacier upload_id.
+
+ Call upload_part for each part (in any order) and then close to complete
+ the upload.
+
+ """
+ def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE):
+ self.vault = vault
+ self.upload_id = upload_id
+ self.part_size = part_size
+ self.chunk_size = chunk_size
+ self.archive_id = None
+
+ self._uploaded_size = 0
+ self._tree_hashes = []
+
+ self.closed = False
+
+ def _insert_tree_hash(self, index, raw_tree_hash):
+ list_length = len(self._tree_hashes)
+ if index >= list_length:
+ self._tree_hashes.extend([None] * (list_length - index + 1))
+ self._tree_hashes[index] = raw_tree_hash
+
+ def upload_part(self, part_index, part_data):
+ """Upload a part to Glacier.
+
+ :param part_index: part number where 0 is the first part
+ :param part_data: data to upload corresponding to this part
+
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
# Create a request and sign it
- part_tree_hash = tree_hash(chunk_hashes(part))
- self._tree_hashes.append(part_tree_hash)
+ part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size))
+ self._insert_tree_hash(part_index, part_tree_hash)
hex_tree_hash = bytes_to_hex(part_tree_hash)
- linear_hash = hashlib.sha256(part).hexdigest()
- content_range = (self._uploaded_size,
- (self._uploaded_size + len(part)) - 1)
+ linear_hash = hashlib.sha256(part_data).hexdigest()
+ start = self.part_size * part_index
+ content_range = (start,
+ (start + len(part_data)) - 1)
response = self.vault.layer1.upload_part(self.vault.name,
self.upload_id,
linear_hash,
hex_tree_hash,
- content_range, part)
-
+ content_range, part_data)
response.read()
- self._uploaded_size += len(part)
+ self._uploaded_size += len(part_data)
+
+ def skip_part(self, part_index, part_tree_hash, part_length):
+ """Skip uploading of a part.
+
+ The final close call needs to calculate the tree hash and total size
+ of all uploaded data, so this is the mechanism for resume
+ functionality to provide it without actually uploading the data again.
+
+ :param part_index: part number where 0 is the first part
+ :param part_tree_hash: binary tree_hash of part being skipped
+ :param part_length: length of part being skipped
- def write(self, str):
+ """
if self.closed:
raise ValueError("I/O operation on closed file")
- if str == "":
- return
- self._buffer.append(str)
- self._buffer_size += len(str)
- while self._buffer_size > self.part_size:
- self.send_part()
+ self._insert_tree_hash(part_index, part_tree_hash)
+ self._uploaded_size += part_length
def close(self):
if self.closed:
return
- if self._buffer_size > 0:
- self.send_part()
+ if None in self._tree_hashes:
+ raise RuntimeError("Some parts were not uploaded.")
# Complete the multiplart glacier upload
hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes))
- response = self.vault.layer1.complete_multipart_upload(self.vault.name,
- self.upload_id,
- hex_tree_hash,
- self._uploaded_size)
+ response = self.vault.layer1.complete_multipart_upload(
+ self.vault.name, self.upload_id, hex_tree_hash,
+ self._uploaded_size)
self.archive_id = response['ArchiveId']
self.closed = True
+
+def generate_parts_from_fobj(fobj, part_size):
+ data = fobj.read(part_size)
+ while data:
+ yield data
+ data = fobj.read(part_size)
+
+
+def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
+ chunk_size=_ONE_MEGABYTE):
+ """Resume upload of a file already part-uploaded to Glacier.
+
+ The resumption of an upload where the part-uploaded section is empty is a
+ valid degenerate case that this function can handle. In this case,
+ part_hash_map should be an empty dict.
+
+ :param vault: boto.glacier.vault.Vault object.
+ :param upload_id: existing Glacier upload id of upload being resumed.
+ :param part_size: part size of existing upload.
+ :param fobj: file object containing local data to resume. This must read
+ from the start of the entire upload, not just from the point being
+ resumed. Use fobj.seek(0) to achieve this if necessary.
+ :param part_hash_map: {part_index: part_tree_hash, ...} of data already
+ uploaded. Each supplied part_tree_hash will be verified and the part
+ re-uploaded if there is a mismatch.
+ :param chunk_size: chunk size of tree hash calculation. This must be
+ 1 MiB for Amazon.
+
+ """
+ uploader = _Uploader(vault, upload_id, part_size, chunk_size)
+ for part_index, part_data in enumerate(
+ generate_parts_from_fobj(fobj, part_size)):
+ part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
+ if (part_index not in part_hash_map or
+ part_hash_map[part_index] != part_tree_hash):
+ uploader.upload_part(part_index, part_data)
+ else:
+ uploader.skip_part(part_index, part_tree_hash, len(part_data))
+ uploader.close()
+ return uploader.archive_id
+
+
+class Writer(object):
+ """
+ Presents a file-like object for writing to a Amazon Glacier
+ Archive. The data is written using the multi-part upload API.
+ """
+ def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE):
+ self.uploader = _Uploader(vault, upload_id, part_size, chunk_size)
+ self.partitioner = _Partitioner(part_size, self._upload_part)
+ self.closed = False
+ self.next_part_index = 0
+
+ def write(self, data):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ self.partitioner.write(data)
+
+ def _upload_part(self, part_data):
+ self.uploader.upload_part(self.next_part_index, part_data)
+ self.next_part_index += 1
+
+ def close(self):
+ if self.closed:
+ return
+ self.partitioner.flush()
+ self.uploader.close()
+ self.closed = True
+
def get_archive_id(self):
self.close()
- return self.archive_id
+ return self.uploader.archive_id
+
+ @property
+ def upload_id(self):
+ return self.uploader.upload_id
+
+ @property
+ def vault(self):
+ return self.uploader.vault
View
121 tests/unit/glacier/test_layer2.py
@@ -23,13 +23,16 @@
from tests.unit import unittest
-from mock import Mock
+from mock import call, Mock, patch, sentinel
from boto.glacier.layer1 import Layer1
from boto.glacier.layer2 import Layer2
+import boto.glacier.vault
from boto.glacier.vault import Vault
from boto.glacier.vault import Job
+from StringIO import StringIO
+
# Some fixture data from the Glacier docs
FIXTURE_VAULT = {
"CreationDate" : "2012-02-20T17:01:45.198Z",
@@ -61,6 +64,53 @@
"VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault"
}
+EXAMPLE_PART_LIST_RESULT_PAGE_1 = {
+ "ArchiveDescription": "archive description 1",
+ "CreationDate": "2012-03-20T17:03:43.221Z",
+ "Marker": "MfgsKHVjbQ6EldVl72bn3_n5h2TaGZQUO-Qb3B9j3TITf7WajQ",
+ "MultipartUploadId": "OW2fM5iVylEpFEMM9_HpKowRapC3vn5sSL39_396UW9zLFUWVrnRHaPjUJddQ5OxSHVXjYtrN47NBZ-khxOjyEXAMPLE",
+ "PartSizeInBytes": 4194304,
+ "Parts":
+ [ {
+ "RangeInBytes": "4194304-8388607",
+ "SHA256TreeHash": "01d34dabf7be316472c93b1ef80721f5d4"
+ }],
+ "VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/demo1-vault"
+}
+
+# The documentation doesn't say whether the non-Parts fields are defined in
+# future pages, so assume they are not.
+EXAMPLE_PART_LIST_RESULT_PAGE_2 = {
+ "ArchiveDescription": None,
+ "CreationDate": None,
+ "Marker": None,
+ "MultipartUploadId": None,
+ "PartSizeInBytes": None,
+ "Parts":
+ [ {
+ "RangeInBytes": "0-4194303",
+ "SHA256TreeHash": "01d34dabf7be316472c93b1ef80721f5d4"
+ }],
+ "VaultARN": None
+}
+
+EXAMPLE_PART_LIST_COMPLETE = {
+ "ArchiveDescription": "archive description 1",
+ "CreationDate": "2012-03-20T17:03:43.221Z",
+ "Marker": None,
+ "MultipartUploadId": "OW2fM5iVylEpFEMM9_HpKowRapC3vn5sSL39_396UW9zLFUWVrnRHaPjUJddQ5OxSHVXjYtrN47NBZ-khxOjyEXAMPLE",
+ "PartSizeInBytes": 4194304,
+ "Parts":
+ [ {
+ "RangeInBytes": "4194304-8388607",
+ "SHA256TreeHash": "01d34dabf7be316472c93b1ef80721f5d4"
+ }, {
+ "RangeInBytes": "0-4194303",
+ "SHA256TreeHash": "01d34dabf7be316472c93b1ef80721f5d4"
+ }],
+ "VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/demo1-vault"
+}
+
class GlacierLayer2Base(unittest.TestCase):
def setUp(self):
@@ -131,6 +181,49 @@ def test_list_jobs(self):
"8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs0"
"1MNGntHEQL8MBfGlqrEXAMPLEArchiveId")
+ def test_list_all_parts_one_page(self):
+ self.mock_layer1.list_parts.return_value = (
+ dict(EXAMPLE_PART_LIST_COMPLETE)) # take a copy
+ parts_result = self.vault.list_all_parts(sentinel.upload_id)
+ expected = [call('examplevault', sentinel.upload_id)]
+ self.assertEquals(expected, self.mock_layer1.list_parts.call_args_list)
+ self.assertEquals(EXAMPLE_PART_LIST_COMPLETE, parts_result)
+
+ def test_list_all_parts_two_pages(self):
+ self.mock_layer1.list_parts.side_effect = [
+ # take copies
+ dict(EXAMPLE_PART_LIST_RESULT_PAGE_1),
+ dict(EXAMPLE_PART_LIST_RESULT_PAGE_2)
+ ]
+ parts_result = self.vault.list_all_parts(sentinel.upload_id)
+ expected = [call('examplevault', sentinel.upload_id),
+ call('examplevault', sentinel.upload_id,
+ marker=EXAMPLE_PART_LIST_RESULT_PAGE_1['Marker'])]
+ self.assertEquals(expected, self.mock_layer1.list_parts.call_args_list)
+ self.assertEquals(EXAMPLE_PART_LIST_COMPLETE, parts_result)
+
+ @patch('boto.glacier.vault.resume_file_upload')
+ def test_resume_archive_from_file(self, mock_resume_file_upload):
+ part_size = 4
+ mock_list_parts = Mock()
+ mock_list_parts.return_value = {
+ 'PartSizeInBytes': part_size,
+ 'Parts': [{
+ 'RangeInBytes': '0-3',
+ 'SHA256TreeHash': '12',
+ }, {
+ 'RangeInBytes': '4-6',
+ 'SHA256TreeHash': '34',
+ },
+ ]}
+
+ self.vault.list_all_parts = mock_list_parts
+ self.vault.resume_archive_from_file(
+ sentinel.upload_id, file_obj=sentinel.file_obj)
+ mock_resume_file_upload.assert_called_once_with(
+ self.vault, sentinel.upload_id, part_size, sentinel.file_obj,
+ {0: '12'.decode('hex'), 1: '34'.decode('hex')})
+
class TestJob(GlacierLayer2Base):
def setUp(self):
@@ -145,3 +238,29 @@ def test_get_job_output(self):
"examplevault",
"HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP"
"54ZShjoQzQVVh7vEXAMPLEjobID", (0,100))
+
+class TestRangeStringParsing(unittest.TestCase):
+ def test_simple_range(self):
+ self.assertEquals(
+ Vault._range_string_to_part_index('0-3', 4), 0)
+
+ def test_range_one_too_big(self):
+ # Off-by-one bug in Amazon's Glacier implementation
+ # See: https://forums.aws.amazon.com/thread.jspa?threadID=106866&tstart=0
+ # Workaround is to assume that if a (start, end] range appears to be
+ # returned then that is what it is.
+ self.assertEquals(
+ Vault._range_string_to_part_index('0-4', 4), 0)
+
+ def test_range_too_big(self):
+ self.assertRaises(
+ AssertionError, Vault._range_string_to_part_index, '0-5', 4)
+
+ def test_range_start_mismatch(self):
+ self.assertRaises(
+ AssertionError, Vault._range_string_to_part_index, '1-3', 4)
+
+ def test_range_end_mismatch(self):
+ # End mismatch is OK, since the last part might be short
+ self.assertEquals(
+ Vault._range_string_to_part_index('0-2', 4), 0)
View
167 tests/unit/glacier/test_writer.py
@@ -1,9 +1,25 @@
from hashlib import sha256
+import itertools
+from StringIO import StringIO
from tests.unit import unittest
-import mock
+from mock import (
+ call,
+ Mock,
+ patch,
+ sentinel,
+)
+from nose.tools import assert_equal
-from boto.glacier.writer import Writer, chunk_hashes
+from boto.glacier.layer1 import Layer1
+from boto.glacier.vault import Vault
+from boto.glacier.writer import (
+ bytes_to_hex,
+ chunk_hashes,
+ resume_file_upload,
+ tree_hash,
+ Writer,
+)
class TestChunking(unittest.TestCase):
@@ -24,3 +40,150 @@ def test_less_than_one_chunk(self):
chunks = chunk_hashes('aaaa')
self.assertEqual(len(chunks), 1)
self.assertEqual(chunks[0], sha256('aaaa').digest())
+
+
+def create_mock_vault():
+ vault = Mock(spec=Vault)
+ vault.layer1 = Mock(spec=Layer1)
+ vault.layer1.complete_multipart_upload.return_value = dict(
+ ArchiveId=sentinel.archive_id)
+ vault.name = sentinel.vault_name
+ return vault
+
+
+def partify(data, part_size):
+ for i in itertools.count(0):
+ start = i * part_size
+ part = data[start:start+part_size]
+ if part:
+ yield part
+ else:
+ return
+
+
+def calculate_mock_vault_calls(data, part_size, chunk_size):
+ upload_part_calls = []
+ data_tree_hashes = []
+ for i, data_part in enumerate(partify(data, part_size)):
+ start = i * part_size
+ end = start + len(data_part)
+ data_part_tree_hash_blob = tree_hash(
+ chunk_hashes(data_part, chunk_size))
+ data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
+ data_part_linear_hash = sha256(data_part).hexdigest()
+ upload_part_calls.append(
+ call.layer1.upload_part(
+ sentinel.vault_name, sentinel.upload_id,
+ data_part_linear_hash, data_part_tree_hash,
+ (start, end - 1), data_part))
+ data_tree_hashes.append(data_part_tree_hash_blob)
+
+ return upload_part_calls, data_tree_hashes
+
+
+def check_mock_vault_calls(vault, upload_part_calls, data_tree_hashes,
+ data_len):
+ vault.layer1.upload_part.assert_has_calls(
+ upload_part_calls, any_order=True)
+ assert_equal(
+ len(upload_part_calls), vault.layer1.upload_part.call_count)
+
+ data_tree_hash = bytes_to_hex(tree_hash(data_tree_hashes))
+ vault.layer1.complete_multipart_upload.assert_called_once_with(
+ sentinel.vault_name, sentinel.upload_id, data_tree_hash, data_len)
+
+
+class TestWriter(unittest.TestCase):
+ def setUp(self):
+ super(TestWriter, self).setUp()
+ self.vault = create_mock_vault()
+ self.chunk_size = 2 # power of 2
+ self.part_size = 4 # power of 2
+ upload_id = sentinel.upload_id
+ self.writer = Writer(
+ self.vault, upload_id, self.part_size, self.chunk_size)
+
+ def check_write(self, write_list):
+ for write_data in write_list:
+ self.writer.write(write_data)
+ self.writer.close()
+
+ data = ''.join(write_list)
+ upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
+ data, self.part_size, self.chunk_size)
+ check_mock_vault_calls(
+ self.vault, upload_part_calls, data_tree_hashes, len(data))
+
+ def test_single_byte_write(self):
+ self.check_write(['1'])
+
+ def test_one_part_write(self):
+ self.check_write(['1234'])
+
+ def test_split_write_1(self):
+ self.check_write(['1', '234'])
+
+ def test_split_write_2(self):
+ self.check_write(['12', '34'])
+
+ def test_split_write_3(self):
+ self.check_write(['123', '4'])
+
+ def test_one_part_plus_one_write(self):
+ self.check_write(['12345'])
+
+ def test_returns_archive_id(self):
+ self.writer.write('1')
+ self.writer.close()
+ self.assertEquals(sentinel.archive_id, self.writer.get_archive_id())
+
+ def test_upload_id(self):
+ self.assertEquals(sentinel.upload_id, self.writer.upload_id)
+
+
+class TestResume(unittest.TestCase):
+ def setUp(self):
+ super(TestResume, self).setUp()
+ self.vault = create_mock_vault()
+ self.chunk_size = 2 # power of 2
+ self.part_size = 4 # power of 2
+
+ def check_no_resume(self, data, resume_set=set()):
+ fobj = StringIO(data)
+ part_hash_map = {}
+ for part_index in resume_set:
+ start = self.part_size * part_index
+ end = start + self.part_size
+ part_data = data[start:end]
+ part_hash_map[part_index] = tree_hash(
+ chunk_hashes(part_data, self.chunk_size))
+
+ resume_file_upload(
+ self.vault, sentinel.upload_id, self.part_size, fobj,
+ part_hash_map, self.chunk_size)
+
+ upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
+ data, self.part_size, self.chunk_size)
+ resume_upload_part_calls = [
+ call for part_index, call in enumerate(upload_part_calls)
+ if part_index not in resume_set]
+ check_mock_vault_calls(
+ self.vault, resume_upload_part_calls, data_tree_hashes, len(data))
+
+ def test_one_part_no_resume(self):
+ self.check_no_resume('1234')
+
+ def test_two_parts_no_resume(self):
+ self.check_no_resume('12345678')
+
+ def test_one_part_resume(self):
+ self.check_no_resume('1234', resume_set=set([0]))
+
+ def test_two_parts_one_resume(self):
+ self.check_no_resume('12345678', resume_set=set([1]))
+
+ def test_returns_archive_id(self):
+ archive_id = resume_file_upload(
+ self.vault, sentinel.upload_id, self.part_size, StringIO('1'), {},
+ self.chunk_size)
+ self.assertEquals(sentinel.archive_id, archive_id)
Please sign in to comment.
Something went wrong with that request. Please try again.