Skip to content

Commit

Permalink
Fixed s3 to s3 sync urlencoding pagniation bug.
Browse files Browse the repository at this point in the history
Keys were not getting url decoded properly with pagination.
If one bucket exited ListObjects pagination before
the other bucket, it caused the handler that decodes the keys
to prematurely exit as well. This resulted in some keys not
getting url decoded.
  • Loading branch information
kyleknap committed Sep 4, 2014
1 parent 2bdb58b commit 54d6430
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 16 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -23,6 +23,10 @@ Next Release (TBD)
* bugfix:Endpoint URL: Provide a better error message when
an invalid ``--endpoint-url`` is provided
(`issue 899 <https://github.com/aws/aws-cli/issues/899>`__)
* bugfix:``aws s3``: Fix issue when keys do not get properly
url decoded when syncing from a bucket that requires pagination
to a bucket that requires less pagination
(`issue 909 <https://github.com/aws/aws-cli/pull/909>`__)


1.4.2
Expand Down
13 changes: 9 additions & 4 deletions awscli/customizations/s3/utils.py
Expand Up @@ -350,7 +350,8 @@ def list_objects(self, bucket, prefix=None, page_size=None):
with ScopedEventHandler(self._operation.session,
'after-call.s3.ListObjects',
self._decode_keys,
'BucketListerDecodeKeys'):
'BucketListerDecodeKeys',
True):
pages = self._operation.paginate(self._endpoint, **kwargs)
for response, page in pages:
contents = page['Contents']
Expand All @@ -368,18 +369,22 @@ def _decode_keys(self, parsed, **kwargs):
class ScopedEventHandler(object):
"""Register an event callback for the duration of a scope."""

def __init__(self, session, event_name, handler, unique_id=None):
def __init__(self, session, event_name, handler, unique_id=None,
unique_id_uses_count=False):
self._session = session
self._event_name = event_name
self._handler = handler
self._unique_id = unique_id
self._unique_id_uses_count = unique_id_uses_count

def __enter__(self):
self._session.register(self._event_name, self._handler, self._unique_id)
self._session.register(self._event_name, self._handler, self._unique_id,
self._unique_id_uses_count)

def __exit__(self, exc_type, exc_value, traceback):
self._session.unregister(self._event_name, self._handler,
self._unique_id)
self._unique_id,
self._unique_id_uses_count)


class PrintTask(namedtuple('PrintTask',
Expand Down
21 changes: 15 additions & 6 deletions tests/integration/customizations/s3/test_plugin.py
Expand Up @@ -490,20 +490,29 @@ def test_sync_with_plus_chars_paginate(self):
self.assertNotIn('upload:', p2.stdout)
self.assertEqual('', p2.stdout)

def test_s3_to_s3_sync_with_plus_char(self):
self.files.create_file('foo+.txt', contents="foo")
def test_s3_to_s3_sync_with_plus_char_paginate(self):
keynames = []
for i in range(4):
keyname = 'foo+%d' % i
keynames.append(keyname)
self.files.create_file(keyname, contents='')

bucket_name = self.create_bucket()
bucket_name_2 = self.create_bucket()

p = aws('s3 sync %s s3://%s' % (self.files.rootdir, bucket_name))
self.assert_no_errors(p)
self.assertTrue(self.key_exists(bucket_name, 'foo+.txt'))
for key in keynames:
self.assertTrue(self.key_exists(bucket_name, key))

p = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2))
p = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' %
(bucket_name, bucket_name_2))
self.assert_no_errors(p)
self.assertTrue(self.key_exists(bucket_name_2, 'foo+.txt'))
for key in keynames:
self.assertTrue(self.key_exists(bucket_name_2, key))

p2 = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2))
p2 = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' %
(bucket_name, bucket_name_2))
self.assertNotIn('copy:', p2.stdout)
self.assertEqual('', p2.stdout)

Expand Down
6 changes: 4 additions & 2 deletions tests/unit/customizations/s3/fake_session.py
Expand Up @@ -70,10 +70,12 @@ def emit(self, *args, **kwargs):
def emit_first_non_none_response(self, *args, **kwargs):
pass

def register(self, name, handler, unique_id=None):
def register(self, name, handler, unique_id=None,
unique_id_uses_call=False):
pass

def unregister(self, name, handler, unique_id=None):
def unregister(self, name, handler, unique_id=None,
unique_id_uses_call=False):
pass


Expand Down
12 changes: 8 additions & 4 deletions tests/unit/customizations/s3/test_utils.py
Expand Up @@ -297,15 +297,19 @@ def test_scoped_session_handler(self):
session = mock.Mock()
scoped = ScopedEventHandler(session, 'eventname', 'handler')
with scoped:
session.register.assert_called_with('eventname', 'handler', None)
session.unregister.assert_called_with('eventname', 'handler', None)
session.register.assert_called_with('eventname', 'handler', None,
False)
session.unregister.assert_called_with('eventname', 'handler', None,
False)

def test_scoped_session_unique(self):
session = mock.Mock()
scoped = ScopedEventHandler(session, 'eventname', 'handler', 'unique')
with scoped:
session.register.assert_called_with('eventname', 'handler', 'unique')
session.unregister.assert_called_with('eventname', 'handler', 'unique')
session.register.assert_called_with('eventname', 'handler',
'unique', False)
session.unregister.assert_called_with('eventname', 'handler', 'unique',
False)


class TestGetFileStat(unittest.TestCase):
Expand Down

0 comments on commit 54d6430

Please sign in to comment.