This repository has been archived by the owner on Jan 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
318 additions
and
336 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,5 @@ addons: | |
- deadsnakes | ||
packages: | ||
- python3.5 | ||
notifications: | ||
email: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
test: | ||
python runtests.py | ||
. aws-credentials && ./runtests.py | ||
|
||
distribute: | ||
python setup.py sdist bdist_wheel upload |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import hashlib | ||
|
||
from django.core.cache import caches | ||
|
||
from collectfast import settings | ||
from .log import log | ||
|
||
try: | ||
from functools import lru_cache | ||
except ImportError: | ||
# make lru_cache do nothing in python 2.7 | ||
def lru_cache(maxsize=128, typed=False): | ||
def decorator(func): | ||
return func | ||
return decorator | ||
|
||
cache = caches[settings.cache] | ||
|
||
|
||
@lru_cache() | ||
def get_cache_key(path): | ||
""" | ||
Create a cache key by concatenating the prefix with a hash of the path. | ||
""" | ||
# Python 2/3 support for path hashing | ||
try: | ||
path_hash = hashlib.md5(path).hexdigest() | ||
except TypeError: | ||
path_hash = hashlib.md5(path.encode('utf-8')).hexdigest() | ||
return settings.cache_key_prefix + path_hash | ||
|
||
|
||
def get_remote_etag(storage, path): | ||
""" | ||
Get etag of path from S3 using boto or boto3. | ||
""" | ||
try: | ||
return storage.bucket.get_key(path).etag | ||
except AttributeError: | ||
pass | ||
try: | ||
return storage.bucket.Object(path).e_tag | ||
except: | ||
pass | ||
return None | ||
|
||
|
||
def get_etag(storage, path): | ||
""" | ||
Get etag of path from cache or S3 - in that order. | ||
""" | ||
cache_key = get_cache_key(path) | ||
etag = cache.get(cache_key, False) | ||
if etag is False: | ||
etag = get_remote_etag(storage, path) | ||
cache.set(cache_key, etag) | ||
return etag | ||
|
||
|
||
def destroy_etag(path): | ||
""" | ||
Clear etag of path from cache. | ||
""" | ||
cache.delete(get_cache_key(path)) | ||
|
||
|
||
def get_file_hash(storage, path): | ||
""" | ||
Create md5 hash from file contents. | ||
""" | ||
contents = storage.open(path).read() | ||
file_hash = '"%s"' % hashlib.md5(contents).hexdigest() | ||
return file_hash | ||
|
||
|
||
def has_matching_etag(remote_storage, source_storage, path): | ||
""" | ||
Compare etag of path in source storage with remote. | ||
""" | ||
storage_etag = get_etag(remote_storage, path) | ||
local_etag = get_file_hash(source_storage, path) | ||
return storage_etag == local_etag | ||
|
||
|
||
def should_copy_file(remote_storage, path, prefixed_path, source_storage): | ||
""" | ||
Returns True if the file should be copied, otherwise False. | ||
""" | ||
normalized_path = remote_storage._normalize_name( | ||
prefixed_path).replace('\\', '/') | ||
|
||
# Compare hashes and skip copying if matching | ||
if has_matching_etag( | ||
remote_storage, source_storage, normalized_path): | ||
log("Skipping '%s' based on matching file hashes" % path, level=2) | ||
return False | ||
|
||
# Invalidate cached versions of lookup if copy is to be done | ||
destroy_etag(normalized_path) | ||
log("Hashes did not match", level=2) | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
def log(message, level=1): | ||
print(message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,174 +1,109 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import with_statement, unicode_literals | ||
import hashlib | ||
import datetime | ||
from multiprocessing.dummy import Pool | ||
import warnings | ||
|
||
from django.conf import settings | ||
from django.contrib.staticfiles.management.commands import collectstatic | ||
from django.core.cache import caches | ||
from django.utils.encoding import smart_str | ||
|
||
|
||
try: | ||
from django.utils.six.moves import input as _input | ||
except ImportError: | ||
_input = raw_input # noqa | ||
|
||
collectfast_cache = getattr(settings, "COLLECTFAST_CACHE", "default") | ||
cache = caches[collectfast_cache] | ||
debug = getattr( | ||
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False)) | ||
threads = getattr(settings, "COLLECTFAST_THREADS", False) | ||
from collectfast.etag import should_copy_file | ||
from collectfast import settings | ||
|
||
|
||
class Command(collectstatic.Command): | ||
|
||
cache_key_prefix = 'collectfast03_asset_' | ||
|
||
def add_arguments(self, parser): | ||
super(Command, self).add_arguments(parser) | ||
parser.add_argument( | ||
'--ignore-etag', | ||
action='store_true', | ||
dest='ignore_etag', | ||
default=False, | ||
help="Deprecated since 0.5.0, use --disable-collectfast instead.") | ||
parser.add_argument( | ||
'--disable-collectfast', | ||
action='store_true', | ||
dest='disable_collectfast', | ||
default=False, | ||
help="Disable Collectfast.") | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(Command, self).__init__(*args, **kwargs) | ||
self.tasks = [] | ||
self.etags = {} | ||
self.storage.preload_metadata = True | ||
if getattr(settings, 'AWS_PRELOAD_METADATA', False) is not True: | ||
self._pre_setup_log( | ||
"----> WARNING!\nCollectfast does not work properly without " | ||
"`AWS_PRELOAD_METADATA` set to `True`.\nOverriding " | ||
self.collectfast_enabled = settings.enabled | ||
if not settings.preload_metadata_enabled: | ||
warnings.warn( | ||
"Collectfast does not work properly without " | ||
"`AWS_PRELOAD_METADATA` set to `True`. Overriding " | ||
"`storage.preload_metadata` and continuing.") | ||
|
||
def set_options(self, **options): | ||
self.ignore_etag = options.pop('ignore_etag', False) | ||
if self.ignore_etag: | ||
""" | ||
Set options and handle deprecation. | ||
""" | ||
ignore_etag = options.pop('ignore_etag', False) | ||
disable = options.pop('disable_collectfast', False) | ||
if ignore_etag: | ||
warnings.warn( | ||
"--ignore-etag is deprecated since 0.5.0, use " | ||
"--disable-collectfast instead.") | ||
if ignore_etag or disable: | ||
self.collectfast_enabled = False | ||
else: | ||
self.collectfast_enabled = getattr( | ||
settings, "COLLECTFAST_ENABLED", True) | ||
super(Command, self).set_options(**options) | ||
|
||
def _pre_setup_log(self, message): | ||
print(message) | ||
|
||
def collect(self): | ||
"""Override collect method to track time""" | ||
|
||
self.num_skipped_files = 0 | ||
start = datetime.datetime.now() | ||
""" | ||
Override collect to copy files concurrently. The tasks are populated by | ||
Command.copy_file() which is called by super().collect(). | ||
""" | ||
ret = super(Command, self).collect() | ||
# Copy files asynchronously | ||
if threads: | ||
Pool(threads).map(self.do_copy_file, self.tasks) | ||
self.collect_time = str(datetime.datetime.now() - start) | ||
if settings.threads: | ||
Pool(settings.threads).map(self.do_copy_file, self.tasks) | ||
return ret | ||
|
||
def get_cache_key(self, path): | ||
# Python 2/3 support for path hashing | ||
try: | ||
path_hash = hashlib.md5(path).hexdigest() | ||
except TypeError: | ||
path_hash = hashlib.md5(path.encode('utf-8')).hexdigest() | ||
return self.cache_key_prefix + path_hash | ||
|
||
def get_boto3_etag(self, path): | ||
try: | ||
return self.storage.bucket.Object(path).e_tag | ||
except: | ||
return None | ||
|
||
def get_remote_etag(self, path): | ||
try: | ||
return self.storage.bucket.get_key(path).etag | ||
except AttributeError: | ||
return self.get_boto3_etag(path) | ||
|
||
def get_etag(self, path): | ||
"""Get etag from local dict, cache or S3 — in that order""" | ||
|
||
if path not in self.etags: | ||
cache_key = self.get_cache_key(path) | ||
cached = cache.get(cache_key, False) | ||
|
||
if cached is False: | ||
self.etags[path] = self.get_remote_etag(path) | ||
cache.set(cache_key, self.etags[path]) | ||
else: | ||
self.etags[path] = cached | ||
|
||
return self.etags[path] | ||
|
||
def destroy_etag(self, path): | ||
if self.etags is not None and path in self.etags: | ||
del self.etags[path] | ||
cache.delete(self.get_cache_key(path)) | ||
|
||
def get_file_hash(self, storage, path): | ||
contents = storage.open(path).read() | ||
file_hash = '"%s"' % hashlib.md5(contents).hexdigest() | ||
return file_hash | ||
|
||
def do_copy_file(self, args): | ||
""" | ||
Attempt to generate an md5 hash of the local file and compare it with | ||
the S3 version's hash before copying the file. | ||
Determine if file should be copied or not and handle exceptions. | ||
""" | ||
path, prefixed_path, source_storage = args | ||
|
||
if self.collectfast_enabled and not self.dry_run: | ||
normalized_path = self.storage._normalize_name( | ||
prefixed_path).replace('\\', '/') | ||
try: | ||
storage_etag = self.get_etag(normalized_path) | ||
local_etag = self.get_file_hash(source_storage, path) | ||
|
||
# Compare hashes and skip copying if matching | ||
if storage_etag == local_etag: | ||
self.log( | ||
"Skipping '%s' based on matching file hashes" % path, | ||
level=2) | ||
self.num_skipped_files += 1 | ||
if not should_copy_file( | ||
self.storage, path, prefixed_path, source_storage): | ||
return False | ||
else: | ||
self.log("Hashes did not match", level=2) | ||
except Exception as e: | ||
if debug: | ||
if settings.debug: | ||
raise | ||
# Ignore errors and let super Command handle it | ||
# Ignore errors and let default collectstatic handle copy | ||
self.stdout.write(smart_str( | ||
"Ignored error in Collectfast:\n%s\n--> Continuing using " | ||
"default collectstatic." % e)) | ||
|
||
# Invalidate cached versions of lookup if copy is done | ||
self.destroy_etag(normalized_path) | ||
|
||
return super(Command, self).copy_file( | ||
path, prefixed_path, source_storage) | ||
|
||
def copy_file(self, path, prefixed_path, source_storage): | ||
""" | ||
Appends path to task queue if threads are enabled, otherwise copies | ||
the file with a blocking call. | ||
""" | ||
args = (path, prefixed_path, source_storage) | ||
if threads: | ||
if settings.threads: | ||
self.tasks.append(args) | ||
else: | ||
self.do_copy_file(args) | ||
|
||
def delete_file(self, path, prefixed_path, source_storage): | ||
"""Override delete_file to skip modified time and exists lookups""" | ||
""" | ||
Override delete_file to skip modified time and exists lookups. | ||
""" | ||
if not self.collectfast_enabled: | ||
return super(Command, self).delete_file( | ||
path, prefixed_path, source_storage) | ||
if self.dry_run: | ||
self.log("Pretending to delete '%s'" % path) | ||
else: | ||
if not self.dry_run: | ||
self.log("Deleting '%s'" % path) | ||
self.storage.delete(prefixed_path) | ||
else: | ||
self.log("Pretending to delete '%s'" % path) | ||
return True |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from django.conf import settings | ||
|
||
debug = getattr( | ||
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False)) | ||
cache_key_prefix = getattr( | ||
settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast03_asset_") | ||
cache = getattr(settings, "COLLECTFAST_CACHE", "default") | ||
threads = getattr(settings, "COLLECTFAST_THREADS", False) | ||
enabled = getattr(settings, "COLLECTFAST_ENABLED", True) | ||
preload_metadata_enabled = True is getattr( | ||
settings, 'AWS_PRELOAD_METADATA', False) |
Oops, something went wrong.