Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
Merge 37a7eef into 3b194ab
Browse files Browse the repository at this point in the history
  • Loading branch information
antonagestam committed Jan 4, 2017
2 parents 3b194ab + 37a7eef commit 85dbe3c
Show file tree
Hide file tree
Showing 16 changed files with 337 additions and 341 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ addons:
- deadsnakes
packages:
- python3.5
notifications:
email: false
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2013-2016 Anton Agestam
Copyright (c) 2013-2017 Anton Agestam

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test:
python runtests.py
. aws-credentials && ./runtests.py

distribute:
python setup.py sdist bdist_wheel upload
22 changes: 18 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Usage

Collectfast overrides Django's builtin ``collectstatic`` command so just
run ``python manage.py collectstatic`` as normal. You can disable
collectfast by using the ``--ignore-etag`` option.
Collectfast by using the ``--disable-collectfast`` option.

You can also disable collectfast by setting
``COLLECTFAST_ENABLED = False`` in your settings file. This is useful
Expand Down Expand Up @@ -131,9 +131,23 @@ Discussion is open and welcome.

**Testing**

To run tests, setup a virtualenv and install tox with ``pip install tox`` then
run ``tox`` in the project directory. To only run tests for a certain
environment run e.g. ``tox -e py35-django110``.
To run integration tests you need to setup an S3 bucket with the name
``collectfast`` and set your AWS credentials as environment variables. You can
do this by adding them to a file ``aws-credentials`` like this:

.. code:: bash
export AWS_ACCESS_KEY_ID="XXXX"
export AWS_SECRET_ACCESS_KEY="XXXX"
And then running the tests with ``. aws-credentials && python runtests.py``.

If you don't feel like setting up an S3 bucket, just skip setting the
environment variables. The integration tests will still run but fail.

To run tests with tox, setup a virtualenv and install tox with
``pip install tox`` then run ``tox`` in the project directory. To only run
tests for a certain environment run e.g. ``tox -e py35-django110``.


License
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ install:
# about it being out of date.
- "pip install --disable-pip-version-check --user --upgrade pip"

# Install Mock
- "%CMD_IN_ENV% pip install mock==1.3.0"
# Install test dependencies
- "%CMD_IN_ENV% pip install mock==1.3.0 boto boto3 django-storages"

# Install the build dependencies of the project. If some dependencies contain
# compiled extensions and are not provided as pre-built wheel packages,
Expand Down
101 changes: 101 additions & 0 deletions collectfast/etag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import hashlib

from django.core.cache import caches

from collectfast import settings
from .log import log

try:
from functools import lru_cache
except ImportError:
# make lru_cache do nothing in python 2.7
def lru_cache(maxsize=128, typed=False):
def decorator(func):
return func
return decorator

cache = caches[settings.cache]


@lru_cache()
def get_cache_key(path):
"""
Create a cache key by concatenating the prefix with a hash of the path.
"""
# Python 2/3 support for path hashing
try:
path_hash = hashlib.md5(path).hexdigest()
except TypeError:
path_hash = hashlib.md5(path.encode('utf-8')).hexdigest()
return settings.cache_key_prefix + path_hash


def get_remote_etag(storage, path):
"""
Get etag of path from S3 using boto or boto3.
"""
try:
return storage.bucket.get_key(path).etag
except AttributeError:
pass
try:
return storage.bucket.Object(path).e_tag
except:
pass
return None


def get_etag(storage, path):
"""
Get etag of path from cache or S3 - in that order.
"""
cache_key = get_cache_key(path)
etag = cache.get(cache_key, False)
if etag is False:
etag = get_remote_etag(storage, path)
cache.set(cache_key, etag)
return etag


def destroy_etag(path):
"""
Clear etag of path from cache.
"""
cache.delete(get_cache_key(path))


def get_file_hash(storage, path):
"""
Create md5 hash from file contents.
"""
contents = storage.open(path).read()
file_hash = '"%s"' % hashlib.md5(contents).hexdigest()
return file_hash


def has_matching_etag(remote_storage, source_storage, path):
"""
Compare etag of path in source storage with remote.
"""
storage_etag = get_etag(remote_storage, path)
local_etag = get_file_hash(source_storage, path)
return storage_etag == local_etag


def should_copy_file(remote_storage, path, prefixed_path, source_storage):
"""
Returns True if the file should be copied, otherwise False.
"""
normalized_path = remote_storage._normalize_name(
prefixed_path).replace('\\', '/')

# Compare hashes and skip copying if matching
if has_matching_etag(
remote_storage, source_storage, normalized_path):
log("Skipping '%s' based on matching file hashes" % path, level=2)
return False

# Invalidate cached versions of lookup if copy is to be done
destroy_etag(normalized_path)
log("Hashes did not match", level=2)
return True
2 changes: 2 additions & 0 deletions collectfast/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def log(message, level=1):
print(message)
157 changes: 46 additions & 111 deletions collectfast/management/commands/collectstatic.py
Original file line number Diff line number Diff line change
@@ -1,174 +1,109 @@
# -*- coding: utf-8 -*-

from __future__ import with_statement, unicode_literals
import hashlib
import datetime
from multiprocessing.dummy import Pool
import warnings

from django.conf import settings
from django.contrib.staticfiles.management.commands import collectstatic
from django.core.cache import caches
from django.utils.encoding import smart_str


try:
from django.utils.six.moves import input as _input
except ImportError:
_input = raw_input # noqa

collectfast_cache = getattr(settings, "COLLECTFAST_CACHE", "default")
cache = caches[collectfast_cache]
debug = getattr(
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False))
threads = getattr(settings, "COLLECTFAST_THREADS", False)
from collectfast.etag import should_copy_file
from collectfast import settings


class Command(collectstatic.Command):

cache_key_prefix = 'collectfast03_asset_'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument(
'--ignore-etag',
action='store_true',
dest='ignore_etag',
default=False,
help="Deprecated since 0.5.0, use --disable-collectfast instead.")
parser.add_argument(
'--disable-collectfast',
action='store_true',
dest='disable_collectfast',
default=False,
help="Disable Collectfast.")

def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
self.tasks = []
self.etags = {}
self.storage.preload_metadata = True
if getattr(settings, 'AWS_PRELOAD_METADATA', False) is not True:
self._pre_setup_log(
"----> WARNING!\nCollectfast does not work properly without "
"`AWS_PRELOAD_METADATA` set to `True`.\nOverriding "
self.collectfast_enabled = settings.enabled
if not settings.preload_metadata_enabled:
warnings.warn(
"Collectfast does not work properly without "
"`AWS_PRELOAD_METADATA` set to `True`. Overriding "
"`storage.preload_metadata` and continuing.")

def set_options(self, **options):
self.ignore_etag = options.pop('ignore_etag', False)
if self.ignore_etag:
"""
Set options and handle deprecation.
"""
ignore_etag = options.pop('ignore_etag', False)
disable = options.pop('disable_collectfast', False)
if ignore_etag:
warnings.warn(
"--ignore-etag is deprecated since 0.5.0, use "
"--disable-collectfast instead.")
if ignore_etag or disable:
self.collectfast_enabled = False
else:
self.collectfast_enabled = getattr(
settings, "COLLECTFAST_ENABLED", True)
super(Command, self).set_options(**options)

def _pre_setup_log(self, message):
print(message)

def collect(self):
"""Override collect method to track time"""

self.num_skipped_files = 0
start = datetime.datetime.now()
"""
Override collect to copy files concurrently. The tasks are populated by
Command.copy_file() which is called by super().collect().
"""
ret = super(Command, self).collect()
# Copy files asynchronously
if threads:
Pool(threads).map(self.do_copy_file, self.tasks)
self.collect_time = str(datetime.datetime.now() - start)
if settings.threads:
Pool(settings.threads).map(self.do_copy_file, self.tasks)
return ret

def get_cache_key(self, path):
# Python 2/3 support for path hashing
try:
path_hash = hashlib.md5(path).hexdigest()
except TypeError:
path_hash = hashlib.md5(path.encode('utf-8')).hexdigest()
return self.cache_key_prefix + path_hash

def get_boto3_etag(self, path):
try:
return self.storage.bucket.Object(path).e_tag
except:
return None

def get_remote_etag(self, path):
try:
return self.storage.bucket.get_key(path).etag
except AttributeError:
return self.get_boto3_etag(path)

def get_etag(self, path):
"""Get etag from local dict, cache or S3 — in that order"""

if path not in self.etags:
cache_key = self.get_cache_key(path)
cached = cache.get(cache_key, False)

if cached is False:
self.etags[path] = self.get_remote_etag(path)
cache.set(cache_key, self.etags[path])
else:
self.etags[path] = cached

return self.etags[path]

def destroy_etag(self, path):
if self.etags is not None and path in self.etags:
del self.etags[path]
cache.delete(self.get_cache_key(path))

def get_file_hash(self, storage, path):
contents = storage.open(path).read()
file_hash = '"%s"' % hashlib.md5(contents).hexdigest()
return file_hash

def do_copy_file(self, args):
"""
Attempt to generate an md5 hash of the local file and compare it with
the S3 version's hash before copying the file.
Determine if file should be copied or not and handle exceptions.
"""
path, prefixed_path, source_storage = args

if self.collectfast_enabled and not self.dry_run:
normalized_path = self.storage._normalize_name(
prefixed_path).replace('\\', '/')
try:
storage_etag = self.get_etag(normalized_path)
local_etag = self.get_file_hash(source_storage, path)

# Compare hashes and skip copying if matching
if storage_etag == local_etag:
self.log(
"Skipping '%s' based on matching file hashes" % path,
level=2)
self.num_skipped_files += 1
if not should_copy_file(
self.storage, path, prefixed_path, source_storage):
return False
else:
self.log("Hashes did not match", level=2)
except Exception as e:
if debug:
if settings.debug:
raise
# Ignore errors and let super Command handle it
# Ignore errors and let default collectstatic handle copy
self.stdout.write(smart_str(
"Ignored error in Collectfast:\n%s\n--> Continuing using "
"default collectstatic." % e))

# Invalidate cached versions of lookup if copy is done
self.destroy_etag(normalized_path)

return super(Command, self).copy_file(
path, prefixed_path, source_storage)

def copy_file(self, path, prefixed_path, source_storage):
"""
Appends path to task queue if threads are enabled, otherwise copies
the file with a blocking call.
"""
args = (path, prefixed_path, source_storage)
if threads:
if settings.threads:
self.tasks.append(args)
else:
self.do_copy_file(args)

def delete_file(self, path, prefixed_path, source_storage):
"""Override delete_file to skip modified time and exists lookups"""
"""
Override delete_file to skip modified time and exists lookups.
"""
if not self.collectfast_enabled:
return super(Command, self).delete_file(
path, prefixed_path, source_storage)
if self.dry_run:
self.log("Pretending to delete '%s'" % path)
else:
if not self.dry_run:
self.log("Deleting '%s'" % path)
self.storage.delete(prefixed_path)
else:
self.log("Pretending to delete '%s'" % path)
return True
Empty file removed collectfast/models.py
Empty file.
Loading

0 comments on commit 85dbe3c

Please sign in to comment.