Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Factor _Uploader and _Partitioner out of Writer

  • Loading branch information...
commit 5769d23adbe13c8b9ed5362b4cbf59e4acf556c1 1 parent d9dbc57
Robie Basak authored jamesls committed
Showing with 88 additions and 33 deletions.
  1. +85 −33 boto/glacier/writer.py
  2. +3 −0  tests/unit/glacier/test_writer.py
View
118 boto/glacier/writer.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
+# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
# Tree hash implementation from Aaron Brady bradya@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a
@@ -94,37 +95,70 @@ def bytes_to_hex(str):
return ''.join(["%02x" % ord(x) for x in str]).strip()
-class Writer(object):
- """
- Presents a file-like object for writing to a Amazon Glacier
- Archive. The data is written using the multi-part upload API.
+class _Partitioner(object):
+ """Convert variable-size writes into part-sized writes
+
+ Call write(data) with variable sized data as needed to write all data. Call
+ flush() after all data is written.
+
+ This instance will call send_fn(part_data) as needed in part_size pieces,
+ except for the final part which may be shorter than part_size. Make sure to
+ call flush() to ensure that a short final part results in a final send_fn
+ call.
+
"""
- def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE):
- self.vault = vault
- self.upload_id = upload_id
+ def __init__(self, part_size, send_fn):
self.part_size = part_size
- self.chunk_size = chunk_size
-
- self._buffer_size = 0
- self._uploaded_size = 0
+ self.send_fn = send_fn
self._buffer = []
- self._tree_hashes = []
+ self._buffer_size = 0
- self.archive_location = None
- self.closed = False
+ def write(self, data):
+ if data == '':
+ return
+ self._buffer.append(data)
+ self._buffer_size += len(data)
+ while self._buffer_size > self.part_size:
+ self._send_part()
- def send_part(self):
- buf = "".join(self._buffer)
+ def _send_part(self):
+ data = ''.join(self._buffer)
# Put back any data remaining over the part size into the
# buffer
- if len(buf) > self.part_size:
- self._buffer = [buf[self.part_size:]]
+ if len(data) > self.part_size:
+ self._buffer = [data[self.part_size:]]
self._buffer_size = len(self._buffer[0])
else:
self._buffer = []
self._buffer_size = 0
# The part we will send
- part = buf[:self.part_size]
+ part = data[:self.part_size]
+ self.send_fn(part)
+
+ def flush(self):
+ if self._buffer_size > 0:
+ self._send_part()
+
+
+class _Uploader(object):
+ """Upload to a Glacier upload_id.
+
+ Call upload_part for each part and then close to complete the upload.
+
+ """
+ def __init__(self, vault, upload_id, chunk_size=_ONE_MEGABYTE):
+ self.vault = vault
+ self.upload_id = upload_id
+ self.chunk_size = chunk_size
+
+ self._uploaded_size = 0
+ self._tree_hashes = []
+
+ self.closed = False
+
+ def upload_part(self, part):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
# Create a request and sign it
part_tree_hash = tree_hash(chunk_hashes(part, self.chunk_size))
self._tree_hashes.append(part_tree_hash)
@@ -138,25 +172,12 @@ def send_part(self):
linear_hash,
hex_tree_hash,
content_range, part)
-
response.read()
self._uploaded_size += len(part)
- def write(self, str):
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if str == "":
- return
- self._buffer.append(str)
- self._buffer_size += len(str)
- while self._buffer_size > self.part_size:
- self.send_part()
-
def close(self):
if self.closed:
return
- if self._buffer_size > 0:
- self.send_part()
# Complete the multiplart glacier upload
hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes))
response = self.vault.layer1.complete_multipart_upload(self.vault.name,
@@ -166,6 +187,37 @@ def close(self):
self.archive_id = response['ArchiveId']
self.closed = True
+
+class Writer(object):
+ """
+ Presents a file-like object for writing to a Amazon Glacier
+ Archive. The data is written using the multi-part upload API.
+ """
+ def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE):
+ self.uploader = _Uploader(vault, upload_id, chunk_size)
+ self.partitioner = _Partitioner(part_size, self.uploader.upload_part)
+ self.closed = False
+
+ def write(self, data):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ self.partitioner.write(data)
+
+ def close(self):
+ if self.closed:
+ return
+ self.partitioner.flush()
+ self.uploader.close()
+ self.closed = True
+
def get_archive_id(self):
self.close()
- return self.archive_id
+ return self.uploader.archive_id
+
+ @property
+ def upload_id(self):
+ return self.uploader.upload_id
+
+ @property
+ def vault(self):
+ return self.uploader.vault
View
3  tests/unit/glacier/test_writer.py
@@ -115,3 +115,6 @@ def test_returns_archive_id(self):
self.writer.write('1')
self.writer.close()
self.assertEquals(sentinel.archive_id, self.writer.get_archive_id())
+
+ def test_upload_id(self):
+ self.assertEquals(sentinel.upload_id, self.writer.upload_id)
Please sign in to comment.
Something went wrong with that request. Please try again.