Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
Merge 2c2ce00 into d705893
Browse files Browse the repository at this point in the history
  • Loading branch information
antonagestam committed Aug 12, 2019
2 parents d705893 + 2c2ce00 commit 4b409cf
Show file tree
Hide file tree
Showing 14 changed files with 250 additions and 181 deletions.
13 changes: 10 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@ python:
- '3.7'
install:
- pip install django=="$DJANGO"
- pip install -r test-requirements.txt
# these are only supported on Python 3.7, so we allow the installation to fail
- pip install black sorti || true
env:
- DJANGO=1.11
- DJANGO=2.1
- DJANGO=2.2
script:
before_script:
# see https://github.com/travis-ci/travis-ci/issues/7940
- sudo rm -f /etc/boto.cfg
- pip install -r test-requirements.txt
- flake8
script:
- 'if [[ $(python --version) == "Python 3.7."* ]]; then flake8; fi'
- 'if [[ $(python --version) == "Python 3.7."* ]]; then black --check .; fi'
- 'if [[ $(python --version) == "Python 3.7."* ]]; then sorti --check .; fi'
- 'if [[ $(python --version) == "Python 3.7."* ]]; then mypy .; fi'
- coverage run --source collectfast ./runtests.py
after_script:
- coveralls
matrix:
exclude:
Expand Down
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Django's cache
framework. <https://docs.djangoproject.com/en/stable/topics/cache/>`_

**Note:** We recommend you to set the ``MAX_ENTRIES`` setting if you
have more than 300 static files, see
have more than 300 static files, see
`#47 <https://github.com/antonagestam/collectfast/issues/47>`_


Expand Down Expand Up @@ -150,6 +150,7 @@ Run linter and test suite:
.. code:: bash
flake8
black --check .
make test
Expand Down
23 changes: 17 additions & 6 deletions collectfast/boto.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
from typing import Any

from . import settings

has_boto = True # type: bool
has_boto3 = True # type: bool

try:
from storages.backends.s3boto import S3BotoStorage
from storages.backends.s3boto3 import S3Boto3Storage
has_boto = True
except:
except ImportError:
has_boto = False

try:
from storages.backends.s3boto3 import S3Boto3Storage
except ImportError:
has_boto3 = False


def is_boto3(storage):
return has_boto and isinstance(storage, S3Boto3Storage)
# type: (Any) -> bool
return has_boto3 and isinstance(storage, S3Boto3Storage)


def is_boto(storage):
# type: (Any) -> bool
return has_boto and (
isinstance(storage, S3BotoStorage) or
isinstance(storage, S3Boto3Storage))
isinstance(storage, S3BotoStorage) or isinstance(storage, S3Boto3Storage)
)


def reset_connection(storage):
# type: (Any) -> None
"""
Reset connection if thread pooling is enabled and storage is boto3.
"""
Expand Down
48 changes: 22 additions & 26 deletions collectfast/etag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,53 @@
import hashlib
import logging
import mimetypes
from functools import lru_cache
from io import BytesIO
from typing import Optional

from django.core.cache import caches
from django.core.files.storage import Storage
from django.utils.encoding import force_bytes
from django.utils.six import BytesIO

from storages.utils import safe_join

from collectfast import settings

try:
from functools import lru_cache
except ImportError:
# make lru_cache do nothing in python 2.7
def lru_cache(maxsize=128, typed=False):
def decorator(func):
return func
return decorator

cache = caches[settings.cache]
logger = logging.getLogger(__name__)


@lru_cache()
def get_cache_key(path):
# type: (str) -> str
"""
Create a cache key by concatenating the prefix with a hash of the path.
"""
# Python 2/3 support for path hashing
try:
path_hash = hashlib.md5(path).hexdigest()
except TypeError:
path_hash = hashlib.md5(path.encode('utf-8')).hexdigest()
path_hash = hashlib.md5(path.encode()).hexdigest()
return settings.cache_key_prefix + path_hash


def get_remote_etag(storage, prefixed_path):
# type: (Storage, str) -> Optional[str]
"""
Get etag of path from S3 using boto or boto3.
"""
normalized_path = safe_join(storage.location, prefixed_path).replace(
'\\', '/')
normalized_path = safe_join(
storage.location, # type: ignore
prefixed_path,
).replace("\\", "/")
try:
return storage.bucket.get_key(normalized_path).etag
return storage.bucket.get_key(normalized_path).etag # type: ignore
except AttributeError:
pass
try:
return storage.bucket.Object(normalized_path).e_tag
return storage.bucket.Object(normalized_path).e_tag # type: ignore
except:
pass
return None


def get_etag(storage, path, prefixed_path):
# type: (Storage, str, str) -> str
"""
Get etag of path from cache or S3 - in that order.
"""
Expand All @@ -67,28 +61,29 @@ def get_etag(storage, path, prefixed_path):


def destroy_etag(path):
# type: (str) -> None
"""
Clear etag of path from cache.
"""
cache.delete(get_cache_key(path))


def get_file_hash(storage, path):
# type: (Storage, str) -> str
"""
Create md5 hash from file contents.
"""
contents = storage.open(path).read()
file_hash = hashlib.md5(contents).hexdigest()

# Check if content should be gzipped and hash gzipped content
content_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
content_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
if settings.is_gzipped and content_type in settings.gzip_content_types:
cache_key = get_cache_key('gzip_hash_%s' % file_hash)
cache_key = get_cache_key("gzip_hash_%s" % file_hash)
file_hash = cache.get(cache_key, False)
if file_hash is False:
buffer = BytesIO()
zf = gzip.GzipFile(
mode='wb', compresslevel=6, fileobj=buffer, mtime=0.0)
zf = gzip.GzipFile(mode="wb", compresslevel=6, fileobj=buffer, mtime=0.0)
zf.write(force_bytes(contents))
zf.close()
file_hash = hashlib.md5(buffer.getvalue()).hexdigest()
Expand All @@ -98,6 +93,7 @@ def get_file_hash(storage, path):


def has_matching_etag(remote_storage, source_storage, path, prefixed_path):
# type: (Storage, Storage, str, str) -> bool
"""
Compare etag of path in source storage with remote.
"""
Expand All @@ -107,11 +103,11 @@ def has_matching_etag(remote_storage, source_storage, path, prefixed_path):


def should_copy_file(remote_storage, path, prefixed_path, source_storage):
# type: (Storage, str, str, Storage) -> bool
"""
Returns True if the file should be copied, otherwise False.
"""
if has_matching_etag(
remote_storage, source_storage, path, prefixed_path):
if has_matching_etag(remote_storage, source_storage, path, prefixed_path):
logger.info("%s: Skipping based on matching file hashes" % path)
return False

Expand Down
82 changes: 50 additions & 32 deletions collectfast/management/commands/collectstatic.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,65 @@
from __future__ import with_statement, unicode_literals
from multiprocessing.dummy import Pool
import warnings
from multiprocessing.dummy import Pool
from typing import List
from typing import Tuple

from django.contrib.staticfiles.management.commands import collectstatic
from django.core.files.storage import Storage
from django.core.management.base import CommandParser
from django.utils.encoding import smart_str

from collectfast.etag import should_copy_file
from collectfast.boto import reset_connection, is_boto
from collectfast import settings
from collectfast.boto import is_boto
from collectfast.boto import reset_connection
from collectfast.etag import should_copy_file

Task = Tuple[str, str, Storage]


class Command(collectstatic.Command):
def add_arguments(self, parser):
# type: (CommandParser) -> None
super(Command, self).add_arguments(parser)
parser.add_argument(
'--ignore-etag',
action='store_true',
dest='ignore_etag',
"--ignore-etag",
action="store_true",
dest="ignore_etag",
default=False,
help="Deprecated since 0.5.0, use --disable-collectfast instead.")
help="Deprecated since 0.5.0, use --disable-collectfast instead.",
)
parser.add_argument(
'--disable-collectfast',
action='store_true',
dest='disable_collectfast',
"--disable-collectfast",
action="store_true",
dest="disable_collectfast",
default=False,
help="Disable Collectfast.")
help="Disable Collectfast.",
)

def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
self.num_copied_files = 0
self.tasks = []
self.etags = {}
self.tasks = [] # type: List[Task]
self.collectfast_enabled = settings.enabled

if is_boto(self.storage) and self.storage.preload_metadata is not True:
self.storage.preload_metadata = True
warnings.warn(
"Collectfast does not work properly without "
"`preload_metadata` set to `True` on the storage class. "
"Overriding `storage.preload_metadata` and continuing.")
"Collectfast does not work properly without `preload_metadata` "
"set to `True` on the storage class. Overriding "
"`storage.preload_metadata` and continuing."
)

def set_options(self, **options):
"""
Set options and handle deprecation.
"""
ignore_etag = options.pop('ignore_etag', False)
disable = options.pop('disable_collectfast', False)
ignore_etag = options.pop("ignore_etag", False)
disable = options.pop("disable_collectfast", False)
if ignore_etag:
warnings.warn(
"--ignore-etag is deprecated since 0.5.0, use "
"--disable-collectfast instead.")
"--disable-collectfast instead."
)
if ignore_etag or disable:
self.collectfast_enabled = False
super(Command, self).set_options(**options)
Expand All @@ -70,8 +80,8 @@ def handle(self, **options):
"""
super(Command, self).handle(**options)
return "{} static file{} copied.".format(
self.num_copied_files,
'' if self.num_copied_files == 1 else 's')
self.num_copied_files, "" if self.num_copied_files == 1 else "s"
)

def do_copy_file(self, args):
"""
Expand All @@ -84,24 +94,28 @@ def do_copy_file(self, args):
if self.collectfast_enabled and not self.dry_run:
try:
if not should_copy_file(
self.storage, path, prefixed_path, source_storage):
self.storage, path, prefixed_path, source_storage
):
return False
except Exception as e:
if settings.debug:
raise
# Ignore errors and let default collectstatic handle copy
self.stdout.write(smart_str(
"Ignored error in Collectfast:\n%s\n--> Continuing using "
"default collectstatic." % e))
self.stdout.write(
smart_str(
"Ignored error in Collectfast:\n%s\n--> Continuing using "
"default collectstatic." % e
)
)

self.num_copied_files += 1
return super(Command, self).copy_file(
path, prefixed_path, source_storage)
return super(Command, self).copy_file(path, prefixed_path, source_storage)

def copy_file(self, path, prefixed_path, source_storage):
# type: (str, str, Storage) -> None
"""
Appends path to task queue if threads are enabled, otherwise copies
the file with a blocking call.
Appends path to task queue if threads are enabled, otherwise copies the
file with a blocking call.
"""
args = (path, prefixed_path, source_storage)
if settings.threads:
Expand All @@ -110,12 +124,16 @@ def copy_file(self, path, prefixed_path, source_storage):
self.do_copy_file(args)

def delete_file(self, path, prefixed_path, source_storage):
# type: (str, str, Storage) -> bool
"""
Override delete_file to skip modified time and exists lookups.
"""
if not self.collectfast_enabled:
return super(Command, self).delete_file(
path, prefixed_path, source_storage)
# The delete_file method is incorrectly annotated in django-stubs,
# see https://github.com/typeddjango/django-stubs/issues/130
return super(Command, self).delete_file( # type: ignore
path, prefixed_path, source_storage
)
if not self.dry_run:
self.log("Deleting '%s'" % path)
self.storage.delete(prefixed_path)
Expand Down
19 changes: 13 additions & 6 deletions collectfast/settings.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from django.conf import settings

debug = getattr(
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False))
debug = getattr(settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False))
cache_key_prefix = getattr(
settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast05_asset_")
settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast05_asset_"
)
cache = getattr(settings, "COLLECTFAST_CACHE", "default")
threads = getattr(settings, "COLLECTFAST_THREADS", False)
enabled = getattr(settings, "COLLECTFAST_ENABLED", True)
is_gzipped = getattr(settings, "AWS_IS_GZIPPED", False)
gzip_content_types = getattr(
settings, "GZIP_CONTENT_TYPES", (
"text/css", "text/javascript", "application/javascript",
"application/x-javascript", "image/svg+xml"))
settings,
"GZIP_CONTENT_TYPES",
(
"text/css",
"text/javascript",
"application/javascript",
"application/x-javascript",
"image/svg+xml",
),
)
Loading

0 comments on commit 4b409cf

Please sign in to comment.