Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
Parallelization, work in progress (#83)
Browse files Browse the repository at this point in the history
Add parallelization
  • Loading branch information
antonagestam committed Dec 17, 2016
1 parent 47303ea commit f03d56e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 66 deletions.
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ https://gitter.im/antonagestam/collectfast|
The fast ``collectstatic`` for Django projects with S3 as storage
backend.

**Features**

- Comparing and caching of md5 checksums before uploading
- Parallel file uploads using Python's multiprocessing module

Running Django's ``collectstatic`` command can become really slow as
more and more files are added to a project, especially if heavy
libraries such as jQuery UI are included in the source. This is a custom
Expand Down Expand Up @@ -94,6 +99,22 @@ have more than 300 static files, see
`#47 <https://github.com/antonagestam/collectfast/issues/47>`_


Enable Parallelization
----------------------

The parallelization feature enables parallel file uploads using Python's
multiprocessing module. Enable it by setting the ``COLLECTFAST_THREADS``
setting.

To enable parallelization of file copying, a dedicated cache backend must be
setup and it must use a backend that is threadsafe, i.e. something other than
Django's default LocMemCache.

.. code:: python
COLLECTFAST_THREADS = 20
Debug
-----

Expand Down
82 changes: 18 additions & 64 deletions collectfast/management/commands/collectstatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
from __future__ import with_statement, unicode_literals
import hashlib
import datetime
from multiprocessing.dummy import Pool

from django.conf import settings
from django.contrib.staticfiles.management.commands import collectstatic
from django.core.cache import caches
from django.core.files.storage import FileSystemStorage
from django.core.management.base import CommandError
from django.utils.encoding import smart_str


Expand All @@ -21,11 +20,11 @@
cache = caches[collectfast_cache]
debug = getattr(
settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False))
threads = getattr(settings, "COLLECTFAST_THREADS", False)


class Command(collectstatic.Command):

etags = None
cache_key_prefix = 'collectfast03_asset_'

def add_arguments(self, parser):
Expand All @@ -39,6 +38,8 @@ def add_arguments(self, parser):

def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
self.tasks = []
self.etags = {}
self.storage.preload_metadata = True
if getattr(settings, 'AWS_PRELOAD_METADATA', False) is not True:
self._pre_setup_log(
Expand All @@ -64,6 +65,9 @@ def collect(self):
self.num_skipped_files = 0
start = datetime.datetime.now()
ret = super(Command, self).collect()
# Copy files asynchronously
if threads:
Pool(threads).map(self.do_copy_file, self.tasks)
self.collect_time = str(datetime.datetime.now() - start)
return ret

Expand All @@ -83,16 +87,13 @@ def get_boto3_etag(self, path):

def get_remote_etag(self, path):
try:
return self.storage.bucket.get_key(path).etag
return self.storage.bucket_key(path).etag

This comment has been minimized.

Copy link
@g-as

g-as Dec 26, 2016

Hey!

is there any reason for this change? I cannot find anything relating in django-storages/boto, and it gives me an AttributeError with boto installed.

This comment has been minimized.

Copy link
@antonagestam

antonagestam Dec 29, 2016

Author Owner

Hey, good catch. I think this was a slip on my side. Quick-fix is to install boto3, otherwise I'll try to amend this ASAP.

Cheers

This comment has been minimized.

Copy link
@g-as

g-as Dec 30, 2016

There's a bug in django-storages that prevents me from using boto3... So I'm stuck for now. Thanks for the quick answer.

This comment has been minimized.

Copy link
@antonagestam

antonagestam Dec 30, 2016

Author Owner

Would you mind testing the fix I just pushed? You can install the development version with pip install -e git://github.com/antonagestam/collectfast.git#egg=collectfast.

That way I can publish to PyPI later today. :)

This comment has been minimized.

Copy link
@g-as

g-as Dec 30, 2016

I already did, since I made a local copy + fix that I'm using ;)

This comment has been minimized.

Copy link
@antonagestam

antonagestam Dec 30, 2016

Author Owner

Cool, I just published 0.4.1 on PyPI. Thank you for helping out! 🍰

This comment has been minimized.

Copy link
@g-as

g-as Dec 30, 2016

My pleasure

except AttributeError:
return self.get_boto3_etag(path)

def get_etag(self, path):
"""Get etag from local dict, cache or S3 — in that order"""

if self.etags is None:
self.etags = {}

if path not in self.etags:
cache_key = self.get_cache_key(path)
cached = cache.get(cache_key, False)
Expand All @@ -115,12 +116,14 @@ def get_file_hash(self, storage, path):
file_hash = '"%s"' % hashlib.md5(contents).hexdigest()
return file_hash

def copy_file(self, path, prefixed_path, source_storage):
def do_copy_file(self, args):
"""
Attempt to generate an md5 hash of the local file and compare it with
the S3 version's hash before copying the file.
"""
path, prefixed_path, source_storage = args

if self.collectfast_enabled and not self.dry_run:
normalized_path = self.storage._normalize_name(
prefixed_path).replace('\\', '/')
Expand Down Expand Up @@ -151,6 +154,13 @@ def copy_file(self, path, prefixed_path, source_storage):
return super(Command, self).copy_file(
path, prefixed_path, source_storage)

def copy_file(self, path, prefixed_path, source_storage):
args = (path, prefixed_path, source_storage)
if threads:
self.tasks.append(args)
else:
self.do_copy_file(args)

def delete_file(self, path, prefixed_path, source_storage):
"""Override delete_file to skip modified time and exists lookups"""
if not self.collectfast_enabled:
Expand All @@ -162,59 +172,3 @@ def delete_file(self, path, prefixed_path, source_storage):
self.log("Deleting '%s'" % path)
self.storage.delete(prefixed_path)
return True

def handle_noargs(self, **options):
self.set_options(**options)
# Warn before doing anything more.
if (isinstance(self.storage, FileSystemStorage) and
self.storage.location):
destination_path = self.storage.location
destination_display = ':\n\n %s' % destination_path
else:
destination_path = None
destination_display = '.'

if self.clear:
clear_display = 'This will DELETE EXISTING FILES!'
else:
clear_display = 'This will overwrite existing files!'

if self.interactive:
confirm = _input("""
You have requested to collect static files at the destination
location as specified in your settings%s
%s
Are you sure you want to do this?
Type 'yes' to continue, or 'no' to cancel: """ % (
destination_display, clear_display))
if confirm != 'yes':
raise CommandError("Collecting static files cancelled.")

collected = self.collect()
modified_count = len(collected['modified'])
unmodified_count = len(collected['unmodified'])
post_processed_count = len(collected['post_processed'])

if self.verbosity >= 1:
template = ("Collected static files in %(collect_time)s."
"\nSkipped %(num_skipped)i already synced files."
"\n%(modified_count)s %(identifier)s %(action)s"
"%(destination)s%(unmodified)s%(post_processed)s.\n")
summary = template % {
'modified_count': modified_count,
'identifier': 'static file' + (
modified_count != 1 and 's' or ''),
'action': self.symlink and 'symlinked' or 'copied',
'destination': (destination_path and " to '%s'"
% destination_path or ''),
'unmodified': (collected['unmodified'] and ', %s unmodified'
% unmodified_count or ''),
'post_processed': (collected['post_processed'] and
', %s post-processed'
% post_processed_count or ''),
'num_skipped': self.num_skipped_files,
'collect_time': self.collect_time,
}
self.stdout.write(smart_str(summary))
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

setup(
name='Collectfast',
description='Collectstatic on Steroids',
version='0.3.1',
description='A Faster Collectstatic',
version='0.4.0',
long_description=open('README.rst').read(),
author='Anton Agestam',
author_email='msn@antonagestam.se',
Expand Down

0 comments on commit f03d56e

Please sign in to comment.