Permalink
Browse files

Add glacier resume_file_upload

  • Loading branch information...
Robie Basak authored and jamesls committed Oct 11, 2012
1 parent 9ecaf83 commit 87685d9623080a727f3fac36df1aa3dd1a2a979d
Showing with 58 additions and 0 deletions.
  1. +58 −0 boto/glacier/writer.py
View
@@ -190,6 +190,23 @@ def upload_part(self, part_index, part_data):
response.read()
self._uploaded_size += len(part_data)
+ def skip_part(self, part_index, part_tree_hash, part_length):
+ """Skip uploading of a part.
+
+ The final close call needs to calculate the tree hash and total size
+ of all uploaded data, so this is the mechanism for resume
+ functionality to provide it without actually uploading the data again.
+
+ :param part_index: part number where 0 is the first part
+ :param part_tree_hash: binary tree_hash of part being skipped
+ :param part_length: length of part being skipped
+
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ self._insert_tree_hash(part_index, part_tree_hash)
+ self._uploaded_size += part_length
+
def close(self):
if self.closed:
return
@@ -205,6 +222,47 @@ def close(self):
self.closed = True
+def generate_parts_from_fobj(fobj, part_size):
+ data = fobj.read(part_size)
+ while data:
+ yield data
+ data = fobj.read(part_size)
+
+
+def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
+ chunk_size=_ONE_MEGABYTE):
+ """Resume upload of a file already part-uploaded to Glacier.
+
+ The resumption of an upload where the part-uploaded section is empty is a
+ valid degenerate case that this function can handle. In this case,
+ part_hash_map should be an empty dict.
+
+ :param vault: boto.glacier.vault.Vault object.
+ :param upload_id: existing Glacier upload id of upload being resumed.
+ :param part_size: part size of existing upload.
+ :param fobj: file object containing local data to resume. This must read
+ from the start of the entire upload, not just from the point being
+ resumed. Use fobj.seek(0) to achieve this if necessary.
+ :param part_hash_map: {part_index: part_tree_hash, ...} of data already
+ uploaded. Each supplied part_tree_hash will be verified and the part
+ re-uploaded if there is a mismatch.
+ :param chunk_size: chunk size of tree hash calculation. This must be
+ 1 MiB for Amazon.
+
+ """
+ uploader = _Uploader(vault, upload_id, part_size, chunk_size)
+ for part_index, part_data in enumerate(
+ generate_parts_from_fobj(fobj, part_size)):
+ part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
+ if (part_index not in part_hash_map or
+ part_hash_map[part_index] != part_tree_hash):
+ uploader.upload_part(part_index, part_data)
+ else:
+ uploader.skip_part(part_index, part_tree_hash, len(part_data))
+ uploader.close()
+ return uploader.archive_id
+
+
class Writer(object):
"""
Presents a file-like object for writing to a Amazon Glacier

0 comments on commit 87685d9

Please sign in to comment.