Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
etag: only use normalized_path for remote storage
Browse files Browse the repository at this point in the history
  • Loading branch information
antonagestam committed Jan 9, 2017
1 parent be5f580 commit 2001f54
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
28 changes: 14 additions & 14 deletions collectfast/etag.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,29 +31,33 @@ def get_cache_key(path):
return settings.cache_key_prefix + path_hash


def get_remote_etag(storage, path):
def get_remote_etag(storage, prefixed_path):
"""
Get etag of path from S3 using boto or boto3.
"""
normalized_path = storage._normalize_name(
prefixed_path).replace('\\', '/')
try:
return storage.bucket.get_key(path).etag
return storage.bucket.get_key(normalized_path).etag
except AttributeError:
pass
try:
return storage.bucket.Object(path).e_tag
return storage.bucket.Object(normalized_path).e_tag
except:
pass
return None


def get_etag(storage, path):
def get_etag(storage, path, prefixed_path):
"""
Get etag of path from cache or S3 - in that order.
"""

cache_key = get_cache_key(path)
etag = cache.get(cache_key, False)
if etag is False:
etag = get_remote_etag(storage, path)

etag = get_remote_etag(storage, prefixed_path)
cache.set(cache_key, etag)
return etag

Expand All @@ -74,11 +78,11 @@ def get_file_hash(storage, path):
return file_hash


def has_matching_etag(remote_storage, source_storage, path):
def has_matching_etag(remote_storage, source_storage, path, prefixed_path):
"""
Compare etag of path in source storage with remote.
"""
storage_etag = get_etag(remote_storage, path)
storage_etag = get_etag(remote_storage, path, prefixed_path)
local_etag = get_file_hash(source_storage, path)
return storage_etag == local_etag

Expand All @@ -87,16 +91,12 @@ def should_copy_file(remote_storage, path, prefixed_path, source_storage):
"""
Returns True if the file should be copied, otherwise False.
"""
normalized_path = remote_storage._normalize_name(
prefixed_path).replace('\\', '/')

# Compare hashes and skip copying if matching
if has_matching_etag(
remote_storage, source_storage, normalized_path):
remote_storage, source_storage, path, prefixed_path):
logger.info("%s: Skipping based on matching file hashes" % path)
return False

# Invalidate cached versions of lookup if copy is to be done
destroy_etag(normalized_path)
# Invalidate cached versions of lookup before copy
destroy_etag(path)
logger.info("%s: Hashes did not match" % path)
return True
2 changes: 1 addition & 1 deletion collectfast/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
debug = getattr(
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False))
cache_key_prefix = getattr(
settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast03_asset_")
settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast05_asset_")
cache = getattr(settings, "COLLECTFAST_CACHE", "default")
threads = getattr(settings, "COLLECTFAST_THREADS", False)
enabled = getattr(settings, "COLLECTFAST_ENABLED", True)
Expand Down
18 changes: 10 additions & 8 deletions collectfast/tests/test_etag.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ def test_get_destroy_etag(case, mocked):
mocked.return_value = expected_hash = 'hash'

# empty cache
h = etag.get_etag('storage', 'path')
h = etag.get_etag('storage', 'path', 'prefixed_path')
case.assertEqual(h, expected_hash)
mocked.assert_called_once_with('storage', 'path')
mocked.assert_called_once_with('storage', 'prefixed_path')

# populated cache
mocked.reset_mock()
h = etag.get_etag('storage', 'path')
h = etag.get_etag('storage', 'path', 'prefixed_path')
case.assertEqual(h, expected_hash)
mocked.assert_not_called()

# test destroy_etag
mocked.reset_mock()
etag.destroy_etag('path')
h = etag.get_etag('storage', 'path')
h = etag.get_etag('storage', 'path', 'prefixed_path')
case.assertEqual(h, expected_hash)
mocked.assert_called_once_with('storage', 'path')
mocked.assert_called_once_with('storage', 'prefixed_path')


@test
Expand All @@ -69,9 +69,11 @@ def test_get_file_hash(case):
@patch('collectfast.etag.get_file_hash')
def test_has_matching_etag(case, mocked_get_etag, mocked_get_file_hash):
mocked_get_etag.return_value = mocked_get_file_hash.return_value = 'hash'
case.assertTrue(etag.has_matching_etag('rs', 'ss', 'path'))
case.assertTrue(
etag.has_matching_etag('rs', 'ss', 'path', 'prefixed_path'))
mocked_get_etag.return_value = 'not same'
case.assertFalse(etag.has_matching_etag('rs', 'ss', 'path'))
case.assertFalse(
etag.has_matching_etag('rs', 'ss', 'path', 'prefixed_path'))


@test
Expand All @@ -88,4 +90,4 @@ def test_should_copy_file(case, mocked_destroy_etag, mocked_has_matching_etag):
mocked_has_matching_etag.return_value = False
case.assertTrue(etag.should_copy_file(
remote_storage, 'path', 'prefixed_path', 'source_storage'))
mocked_destroy_etag.assert_called_once_with('prefixed_path')
mocked_destroy_etag.assert_called_once_with('path')

0 comments on commit 2001f54

Please sign in to comment.