From 5f996e7ab0d83fee93a3bc64c3edc911b6e4f35b Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 05:04:15 +0100 Subject: [PATCH 01/15] ripped out much of the command methods handling etags --- collectfast/etag.py | 92 +++++++++++ collectfast/log.py | 2 + .../management/commands/collectstatic.py | 152 +++++------------- collectfast/settings.py | 9 ++ 4 files changed, 146 insertions(+), 109 deletions(-) create mode 100644 collectfast/etag.py create mode 100644 collectfast/log.py create mode 100644 collectfast/settings.py diff --git a/collectfast/etag.py b/collectfast/etag.py new file mode 100644 index 0000000..07d0519 --- /dev/null +++ b/collectfast/etag.py @@ -0,0 +1,92 @@ +import hashlib +from functools import lru_cache + +from django.core.cache import caches + +from collectfast import settings +from .log import log + +cache = caches[settings.cache] + + +@lru_cache() +def get_cache_key(path): + """ + Create a cache key by concatenating the prefix with a hash of the path. + """ + # Python 2/3 support for path hashing + try: + path_hash = hashlib.md5(path).hexdigest() + except TypeError: + path_hash = hashlib.md5(path.encode('utf-8')).hexdigest() + return settings.cache_key_prefix + path_hash + + +def get_remote_etag(storage, path): + """ + Get etag of path from S3 using boto or boto3. + """ + try: + return storage.bucket.get_key(path).etag + except AttributeError: + try: + return storage.bucket.Object(path).e_tag + except AttributeError: + return None + + +def get_etag(storage, path): + """ + Get etag of path from cache or S3 - in that order. + """ + cache_key = get_cache_key(path) + etag = settings.cache.get(cache_key, False) + if etag is False: + etag = get_remote_etag(storage, path) + cache.set(cache_key, etag) + return etag + + +def destroy_etag(path): + """ + Clear etag of path from cache. + """ + cache.delete(get_cache_key(path)) + + +def get_file_hash(storage, path): + """ + Create md5 hash from file contents. + """ + contents = storage.open(path).read() + file_hash = '"%s"' % hashlib.md5(contents).hexdigest() + return file_hash + + +def has_matching_etag(remote_storage, source_storage, path): + """ + Compare etag of path in source storage with remote. + """ + + storage_etag = get_etag(remote_storage, path) + local_etag = get_file_hash(source_storage, path) + return storage_etag == local_etag + + +def copy_file(remote_storage, path, prefixed_path, source_storage): + """ + Returns True if the file should be copied otherwise False. + """ + normalized_path = remote_storage._normalize_name( + prefixed_path).replace('\\', '/') + + # Compare hashes and skip copying if matching + if has_matching_etag( + remote_storage, source_storage, normalized_path): + log("Skipping '%s' based on matching file hashes" % path, level=2) + return False + + # Invalidate cached versions of lookup if copy is to be done + destroy_etag(normalized_path) + log("Hashes did not match", level=2) + return True diff --git a/collectfast/log.py b/collectfast/log.py new file mode 100644 index 0000000..29163cb --- /dev/null +++ b/collectfast/log.py @@ -0,0 +1,2 @@ +def log(message, level=1): + print(message) diff --git a/collectfast/management/commands/collectstatic.py b/collectfast/management/commands/collectstatic.py index eb2ca6b..271ac2b 100644 --- a/collectfast/management/commands/collectstatic.py +++ b/collectfast/management/commands/collectstatic.py @@ -1,32 +1,14 @@ -# -*- coding: utf-8 -*- - from __future__ import with_statement, unicode_literals -import hashlib -import datetime from multiprocessing.dummy import Pool -from django.conf import settings from django.contrib.staticfiles.management.commands import collectstatic -from django.core.cache import caches from django.utils.encoding import smart_str - -try: - from django.utils.six.moves import input as _input -except ImportError: - _input = raw_input # noqa - -collectfast_cache = getattr(settings, "COLLECTFAST_CACHE", "default") -cache = caches[collectfast_cache] -debug = getattr( - settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False)) -threads = getattr(settings, "COLLECTFAST_THREADS", False) +from collectfast.etag import copy_file +from collectfast import settings class Command(collectstatic.Command): - - cache_key_prefix = 'collectfast03_asset_' - def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument( @@ -34,6 +16,12 @@ def add_arguments(self, parser): action='store_true', dest='ignore_etag', default=False, + help="Deprecated since 0.5.0, use --disable-collectfast instead.") + parser.add_argument( + '--disable-collectfast', + action='store_true', + dest='disable_collectfast', + default=False, help="Disable Collectfast.") def __init__(self, *args, **kwargs): @@ -41,134 +29,80 @@ def __init__(self, *args, **kwargs): self.tasks = [] self.etags = {} self.storage.preload_metadata = True + self.collectfast_enabled = settings.enabled if getattr(settings, 'AWS_PRELOAD_METADATA', False) is not True: - self._pre_setup_log( - "----> WARNING!\nCollectfast does not work properly without " - "`AWS_PRELOAD_METADATA` set to `True`.\nOverriding " + raise Warning( + "Collectfast does not work properly without " + "`AWS_PRELOAD_METADATA` set to `True`. Overriding " "`storage.preload_metadata` and continuing.") def set_options(self, **options): - self.ignore_etag = options.pop('ignore_etag', False) - if self.ignore_etag: + """ + Set options and handle deprecation. + """ + ignore_etag = options.pop('ignore_etag', False) + disable = options.pop('disable_collectfast', False) + if ignore_etag: + raise DeprecationWarning( + "--ignore-etag is deprecated since 0.5.0, " + "use --disable-collectfast instead.") + if ignore_etag or disable: self.collectfast_enabled = False - else: - self.collectfast_enabled = getattr( - settings, "COLLECTFAST_ENABLED", True) super(Command, self).set_options(**options) - def _pre_setup_log(self, message): - print(message) - def collect(self): - """Override collect method to track time""" - - self.num_skipped_files = 0 - start = datetime.datetime.now() + """ + Override collect to copy files concurrently. The tasks are populated by + Command.copy_file() which is called by super().collect(). + """ ret = super(Command, self).collect() - # Copy files asynchronously - if threads: - Pool(threads).map(self.do_copy_file, self.tasks) - self.collect_time = str(datetime.datetime.now() - start) + if settings.threads: + Pool(settings.threads).map(self.do_copy_file, self.tasks) return ret - def get_cache_key(self, path): - # Python 2/3 support for path hashing - try: - path_hash = hashlib.md5(path).hexdigest() - except TypeError: - path_hash = hashlib.md5(path.encode('utf-8')).hexdigest() - return self.cache_key_prefix + path_hash - - def get_boto3_etag(self, path): - try: - return self.storage.bucket.Object(path).e_tag - except: - return None - - def get_remote_etag(self, path): - try: - return self.storage.bucket.get_key(path).etag - except AttributeError: - return self.get_boto3_etag(path) - - def get_etag(self, path): - """Get etag from local dict, cache or S3 — in that order""" - - if path not in self.etags: - cache_key = self.get_cache_key(path) - cached = cache.get(cache_key, False) - - if cached is False: - self.etags[path] = self.get_remote_etag(path) - cache.set(cache_key, self.etags[path]) - else: - self.etags[path] = cached - - return self.etags[path] - - def destroy_etag(self, path): - if self.etags is not None and path in self.etags: - del self.etags[path] - cache.delete(self.get_cache_key(path)) - - def get_file_hash(self, storage, path): - contents = storage.open(path).read() - file_hash = '"%s"' % hashlib.md5(contents).hexdigest() - return file_hash - def do_copy_file(self, args): """ - Attempt to generate an md5 hash of the local file and compare it with - the S3 version's hash before copying the file. - + Determine if file should be copied or not and handle exceptions. """ path, prefixed_path, source_storage = args if self.collectfast_enabled and not self.dry_run: - normalized_path = self.storage._normalize_name( - prefixed_path).replace('\\', '/') try: - storage_etag = self.get_etag(normalized_path) - local_etag = self.get_file_hash(source_storage, path) - - # Compare hashes and skip copying if matching - if storage_etag == local_etag: - self.log( - "Skipping '%s' based on matching file hashes" % path, - level=2) - self.num_skipped_files += 1 + if not copy_file( + self.storage, path, prefixed_path, source_storage): return False - else: - self.log("Hashes did not match", level=2) except Exception as e: - if debug: + if settings.debug: raise # Ignore errors and let super Command handle it self.stdout.write(smart_str( "Ignored error in Collectfast:\n%s\n--> Continuing using " "default collectstatic." % e)) - # Invalidate cached versions of lookup if copy is done - self.destroy_etag(normalized_path) - return super(Command, self).copy_file( path, prefixed_path, source_storage) def copy_file(self, path, prefixed_path, source_storage): + """ + Appends path to task queue if threads are enabled, otherwise copies + the file with a blocking call. + """ args = (path, prefixed_path, source_storage) - if threads: + if settings.threads: self.tasks.append(args) else: self.do_copy_file(args) def delete_file(self, path, prefixed_path, source_storage): - """Override delete_file to skip modified time and exists lookups""" + """ + Override delete_file to skip modified time and exists lookups. + """ if not self.collectfast_enabled: return super(Command, self).delete_file( path, prefixed_path, source_storage) - if self.dry_run: - self.log("Pretending to delete '%s'" % path) - else: + if not self.dry_run: self.log("Deleting '%s'" % path) self.storage.delete(prefixed_path) + else: + self.log("Pretending to delete '%s'" % path) return True diff --git a/collectfast/settings.py b/collectfast/settings.py new file mode 100644 index 0000000..159e1f8 --- /dev/null +++ b/collectfast/settings.py @@ -0,0 +1,9 @@ +from django.conf import settings + +debug = getattr( + settings, "COLLECTFAST_DEBUG", getattr(settings, "DEBUG", False)) +cache_key_prefix = getattr( + settings, "COLLECTFAST_CACHE_KEY_PREFIX", "collectfast03_asset_") +cache = getattr(settings, "COLLECTFAST_CACHE", "default") +threads = getattr(settings, "COLLECTFAST_THREADS", False) +enabled = getattr(settings, "COLLECTFAST_ENABLED", True) From 64e6f657227a4ee94368109d812511e8266d283e Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 05:33:55 +0100 Subject: [PATCH 02/15] cleanup --- LICENSE | 2 +- collectfast/tests/test_command.py | 7 +++++-- setup.py | 4 ++-- tox.ini | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/LICENSE b/LICENSE index da3d5b7..d98e358 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2013-2016 Anton Agestam +Copyright (c) 2013-2017 Anton Agestam Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/collectfast/tests/test_command.py b/collectfast/tests/test_command.py index f0e3567..da3ab4d 100644 --- a/collectfast/tests/test_command.py +++ b/collectfast/tests/test_command.py @@ -7,9 +7,12 @@ from django.core.files.storage import Storage, FileSystemStorage from django.core.files.base import ContentFile -from django.conf import settings +from django.core.cache import caches -from ..management.commands.collectstatic import Command, cache +from collectfast.management.commands.collectstatic import Command +from collectfast import settings + +cache = caches[settings.cache] class BotolikeStorage(Storage): diff --git a/setup.py b/setup.py index 7eba146..2336d64 100644 --- a/setup.py +++ b/setup.py @@ -12,9 +12,9 @@ author_email='msn@antonagestam.se', packages=find_packages(), url='https://github.com/antonagestam/collectfast/', - license='Creative Commons Attribution-ShareAlike 3.0 Unported License', + license='MIT License', include_package_data=True, - install_requires=['Django>=1.8', 'python-dateutil>=2.1', 'pytz>=2014.2', ], + install_requires=['Django>=1.8'], classifiers=[ 'Environment :: Web Environment', 'Intended Audience :: Developers', diff --git a/tox.ini b/tox.ini index 250aa61..5b7b078 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,33,34,35}-django{18,19} +envlist = py{27,33,34,35,36}-django{18,19,110} [testenv] deps = django18: Django>=1.8,<1.9 From 9e9013e89f0c70aac8f9d32d817246edd7c9b13e Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 05:39:24 +0100 Subject: [PATCH 03/15] disable travis emails --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index e3322c4..7053ef8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,3 +22,5 @@ addons: - deadsnakes packages: - python3.5 +notifications: + email: false From 0509d29ec6b04400ba6fe295c33543f86a5aa07f Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 19:52:35 +0100 Subject: [PATCH 04/15] remove models.py, bump version --- collectfast/models.py | 0 setup.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 collectfast/models.py diff --git a/collectfast/models.py b/collectfast/models.py deleted file mode 100644 index e69de29..0000000 diff --git a/setup.py b/setup.py index 2336d64..1c4a7e2 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='Collectfast', description='A Faster Collectstatic', - version='0.4.1', + version='0.5.0', long_description=open('README.rst').read(), author='Anton Agestam', author_email='msn@antonagestam.se', From d925b6dec1df327414fc4679d5b1992b99057695 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 20:25:43 +0100 Subject: [PATCH 05/15] fix --- collectfast/etag.py | 12 +++++++----- collectfast/management/commands/collectstatic.py | 11 ++++++----- collectfast/settings.py | 2 ++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/collectfast/etag.py b/collectfast/etag.py index 07d0519..bbe4388 100644 --- a/collectfast/etag.py +++ b/collectfast/etag.py @@ -29,10 +29,12 @@ def get_remote_etag(storage, path): try: return storage.bucket.get_key(path).etag except AttributeError: - try: - return storage.bucket.Object(path).e_tag - except AttributeError: - return None + pass + try: + return storage.bucket.Object(path).e_tag + except: + pass + return None def get_etag(storage, path): @@ -40,7 +42,7 @@ def get_etag(storage, path): Get etag of path from cache or S3 - in that order. """ cache_key = get_cache_key(path) - etag = settings.cache.get(cache_key, False) + etag = cache.get(cache_key, False) if etag is False: etag = get_remote_etag(storage, path) cache.set(cache_key, etag) diff --git a/collectfast/management/commands/collectstatic.py b/collectfast/management/commands/collectstatic.py index 271ac2b..b72fa1e 100644 --- a/collectfast/management/commands/collectstatic.py +++ b/collectfast/management/commands/collectstatic.py @@ -1,5 +1,6 @@ from __future__ import with_statement, unicode_literals from multiprocessing.dummy import Pool +import warnings from django.contrib.staticfiles.management.commands import collectstatic from django.utils.encoding import smart_str @@ -30,8 +31,8 @@ def __init__(self, *args, **kwargs): self.etags = {} self.storage.preload_metadata = True self.collectfast_enabled = settings.enabled - if getattr(settings, 'AWS_PRELOAD_METADATA', False) is not True: - raise Warning( + if not settings.preload_metadata_enabled: + warnings.warn( "Collectfast does not work properly without " "`AWS_PRELOAD_METADATA` set to `True`. Overriding " "`storage.preload_metadata` and continuing.") @@ -43,9 +44,9 @@ def set_options(self, **options): ignore_etag = options.pop('ignore_etag', False) disable = options.pop('disable_collectfast', False) if ignore_etag: - raise DeprecationWarning( - "--ignore-etag is deprecated since 0.5.0, " - "use --disable-collectfast instead.") + warnings.warn( + "--ignore-etag is deprecated since 0.5.0, use " + "--disable-collectfast instead.") if ignore_etag or disable: self.collectfast_enabled = False super(Command, self).set_options(**options) diff --git a/collectfast/settings.py b/collectfast/settings.py index 159e1f8..959eb38 100644 --- a/collectfast/settings.py +++ b/collectfast/settings.py @@ -7,3 +7,5 @@ cache = getattr(settings, "COLLECTFAST_CACHE", "default") threads = getattr(settings, "COLLECTFAST_THREADS", False) enabled = getattr(settings, "COLLECTFAST_ENABLED", True) +preload_metadata_enabled = True is getattr( + settings, 'AWS_PRELOAD_METADATA', False) From f3bafc398b3be384dcdcd0b323179f2bbfa816d7 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 20:28:50 +0100 Subject: [PATCH 06/15] fix --- collectfast/etag.py | 1 - collectfast/management/commands/collectstatic.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/collectfast/etag.py b/collectfast/etag.py index bbe4388..9be1b44 100644 --- a/collectfast/etag.py +++ b/collectfast/etag.py @@ -69,7 +69,6 @@ def has_matching_etag(remote_storage, source_storage, path): """ Compare etag of path in source storage with remote. """ - storage_etag = get_etag(remote_storage, path) local_etag = get_file_hash(source_storage, path) return storage_etag == local_etag diff --git a/collectfast/management/commands/collectstatic.py b/collectfast/management/commands/collectstatic.py index b72fa1e..407a717 100644 --- a/collectfast/management/commands/collectstatic.py +++ b/collectfast/management/commands/collectstatic.py @@ -75,7 +75,7 @@ def do_copy_file(self, args): except Exception as e: if settings.debug: raise - # Ignore errors and let super Command handle it + # Ignore errors and let default collectstatic handle copy self.stdout.write(smart_str( "Ignored error in Collectfast:\n%s\n--> Continuing using " "default collectstatic." % e)) From ea187907343945c340d032da1f45f77437a806ad Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 21:05:45 +0100 Subject: [PATCH 07/15] tests --- collectfast/etag.py | 4 +- .../management/commands/collectstatic.py | 4 +- collectfast/tests/test_command.py | 222 ------------------ collectfast/tests/test_etag.py | 45 ++++ 4 files changed, 49 insertions(+), 226 deletions(-) delete mode 100644 collectfast/tests/test_command.py create mode 100644 collectfast/tests/test_etag.py diff --git a/collectfast/etag.py b/collectfast/etag.py index 9be1b44..68ab3ef 100644 --- a/collectfast/etag.py +++ b/collectfast/etag.py @@ -74,9 +74,9 @@ def has_matching_etag(remote_storage, source_storage, path): return storage_etag == local_etag -def copy_file(remote_storage, path, prefixed_path, source_storage): +def should_copy_file(remote_storage, path, prefixed_path, source_storage): """ - Returns True if the file should be copied otherwise False. + Returns True if the file should be copied, otherwise False. """ normalized_path = remote_storage._normalize_name( prefixed_path).replace('\\', '/') diff --git a/collectfast/management/commands/collectstatic.py b/collectfast/management/commands/collectstatic.py index 407a717..de8e0f9 100644 --- a/collectfast/management/commands/collectstatic.py +++ b/collectfast/management/commands/collectstatic.py @@ -5,7 +5,7 @@ from django.contrib.staticfiles.management.commands import collectstatic from django.utils.encoding import smart_str -from collectfast.etag import copy_file +from collectfast.etag import should_copy_file from collectfast import settings @@ -69,7 +69,7 @@ def do_copy_file(self, args): if self.collectfast_enabled and not self.dry_run: try: - if not copy_file( + if not should_copy_file( self.storage, path, prefixed_path, source_storage): return False except Exception as e: diff --git a/collectfast/tests/test_command.py b/collectfast/tests/test_command.py deleted file mode 100644 index da3ab4d..0000000 --- a/collectfast/tests/test_command.py +++ /dev/null @@ -1,222 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import unicode_literals -from unittest import TestCase -from mock import patch -from os.path import join - -from django.core.files.storage import Storage, FileSystemStorage -from django.core.files.base import ContentFile -from django.core.cache import caches - -from collectfast.management.commands.collectstatic import Command -from collectfast import settings - -cache = caches[settings.cache] - - -class BotolikeStorage(Storage): - location = None - - def _normalize_name(self, path): - if self.location is not None: - path = join(self.location, path) - return path - - -class CollectfastTestCase(TestCase): - def setUp(self): - cache.clear() - self.path = '.collectfast-test-file.txt' - self.storage = FileSystemStorage(location='./') - - def get_command(self, *args, **kwargs): - return Command(*args, **kwargs) - - def tearDown(self): - self.storage.delete(self.path) - - -class TestCommand(CollectfastTestCase): - @patch("collectfast.management.commands.collectstatic.collectstatic" - ".Command.collect") - def test_collect(self, mocked_super): - command = self.get_command() - command.collect() - self.assertEqual(command.num_skipped_files, 0) - self.assertIsInstance(command.collect_time, str) - - def test_get_cache_key(self): - command = self.get_command() - cache_key = command.get_cache_key('/some/random/path') - prefix_len = len(command.cache_key_prefix) - self.assertTrue(cache_key.startswith(command.cache_key_prefix)) - self.assertEqual(32 + prefix_len, len(cache_key)) - - @patch("collectfast.management.commands.collectstatic.cache.get") - @patch("collectfast.management.commands.collectstatic.Command" - ".get_remote_etag") - def mock_get_lookup(self, path, cached_value, mocked_lookup, mocked_cache): - mocked_lookup.return_value = 'a fresh lookup' - mocked_cache.return_value = cached_value - command = self.get_command() - ret_val = command.get_etag(path) - return ret_val, mocked_lookup, mocked_cache - - def get_fresh_lookup(self, path): - return self.mock_get_lookup(path, False) - - def get_cached_lookup(self, path): - return self.mock_get_lookup(path, 'a cached lookup') - - def test_get_lookup(self): - path = '/some/unique/path' - cache_key = self.get_command().get_cache_key(path) - - # Assert storage lookup is hit and cache is populated - ret_val, mocked_lookup, mocked_cache = self.get_fresh_lookup(path) - mocked_lookup.assert_called_once_with(path) - self.assertEqual(ret_val, 'a fresh lookup') - self.assertEqual(cache.get(cache_key), 'a fresh lookup') - - # Assert storage is not hit, but cache is - ret_val, mocked_lookup, mocked_cache = self.get_cached_lookup(path) - self.assertEqual(mocked_lookup.call_count, 0) - self.assertEqual(mocked_cache.call_count, 1) - self.assertEqual(ret_val, 'a cached lookup') - - @patch("collectfast.management.commands.collectstatic.Command" - ".get_remote_etag") - def test_destroy_lookup(self, mocked_lookup): - mocked_lookup.return_value = 'a fake lookup' - c = self.get_command() - path = '/another/unique/path' - cache_key = c.get_cache_key(path) - c.get_etag(path) - self.assertEqual(cache.get(cache_key), mocked_lookup.return_value) - self.assertEqual(c.etags[path], mocked_lookup.return_value) - - c.destroy_etag(path) - self.assertEqual(cache.get(cache_key, 'empty'), 'empty') - self.assertNotIn(path, c.etags) - - def test_make_sure_it_has_ignore_etag(self): - command = self.get_command() - parser = command.create_parser('', '') - self.assertIn('ignore_etag', parser.parse_args()) - - -class TestGetFileHash(CollectfastTestCase): - def test_get_file_hash(self): - content = 'this is some content to be hashed' - expected_hash = '"16e71fd2be8be2a3a8c0be7b9aab6c04"' - c = self.get_command() - - self.storage.save(self.path, ContentFile(content)) - file_hash = c.get_file_hash(self.storage, self.path) - self.assertEqual(file_hash, expected_hash) - - self.storage.delete(self.path) - - self.storage.save(self.path, ContentFile('some nonsense')) - file_hash = c.get_file_hash(self.storage, self.path) - self.assertNotEqual(file_hash, expected_hash) - - -class TestCopyFile(CollectfastTestCase): - @patch("collectfast.management.commands.collectstatic.collectstatic.Command" - ".copy_file") - @patch("collectfast.management.commands.collectstatic.Command.get_etag") - def call_copy_file(self, mocked_lookup, mocked_copy_file_super, **kwargs): - options = { - "interactive": False, - "verbosity": 1, - "post_process": False, - "dry_run": False, - "clear": False, - "link": False, - "ignore_patterns": [], - "use_default_ignore_patterns": True} - options.update(kwargs) - path = options.pop('path', '/a/sweet/path') - - if 'lookup_hash' in options: - mocked_lookup.return_value = options.pop('lookup_hash') - - c = self.get_command() - c.storage = options.pop('storage', BotolikeStorage()) - c.set_options(**options) - c.num_skipped_files = 0 - ret_val = c.copy_file(path, path, c.storage) - return ret_val, mocked_copy_file_super, mocked_lookup - - def test_respect_flags(self): - """`copy_file` respects --ignore_etag and --dry_run flags""" - path = '/a/sweet/path' - storage = BotolikeStorage() - - ret_val, super_mock, lookup_mock = self.call_copy_file( - path=path, storage=storage, ignore_etag=True) - self.assertEqual(lookup_mock.call_count, 0) - - ret_val, super_mock, lookup_mock = self.call_copy_file( - path=path, storage=storage, dry_run=True) - self.assertEqual(lookup_mock.call_count, 0) - - @patch("collectfast.management.commands.collectstatic.Command" - ".get_file_hash") - @patch("collectfast.management.commands.collectstatic.Command" - ".destroy_etag") - def test_calls_super(self, mock_destroy_lookup, mock_get_file_hash): - """`copy_file` properly calls super method""" - path = '/a/sweet/path' - storage = BotolikeStorage() - - ret_val, super_mock, lookup_mock = self.call_copy_file( - path=path, storage=storage) - super_mock.assert_called_once_with(path, path, storage) - self.assertFalse(ret_val is False) - mock_destroy_lookup.assert_called_once_with(path) - - @patch("collectfast.management.commands.collectstatic.Command" - ".get_file_hash") - def test_skips(self, mock_get_file_hash): - """ - Returns False and increments self.num_skipped_files if matching - hashes - """ - # mock get_file_hash and lookup to return the same hashes - mock_hash = 'thisisafakehash' - mock_get_file_hash.return_value = mock_hash - - storage = BotolikeStorage() - - ret_val, super_mock, lookup_mock = self.call_copy_file( - path=self.path, storage=storage, lookup_hash=mock_hash) - self.assertFalse(ret_val) - self.assertEqual(super_mock.call_count, 0) - - -class TestAwsPreloadMetadata(CollectfastTestCase): - def setUp(self): - super(TestAwsPreloadMetadata, self).setUp() - settings.AWS_PRELOAD_METADATA = False - - def tearDown(self): - super(TestAwsPreloadMetadata, self).tearDown() - settings.AWS_PRELOAD_METADATA = True - - @patch( - "collectfast.management.commands.collectstatic.Command._pre_setup_log") - def test_always_true(self, _mock_log): - c = self.get_command() - self.assertTrue(c.storage.preload_metadata) - - @patch( - "collectfast.management.commands.collectstatic.Command._pre_setup_log") - def test_log(self, mock_log): - self.get_command() - mock_log.assert_called_once_with( - "----> WARNING!\nCollectfast does not work properly without " - "`AWS_PRELOAD_METADATA` set to `True`.\nOverriding " - "`storage.preload_metadata` and continuing.") diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py new file mode 100644 index 0000000..682420e --- /dev/null +++ b/collectfast/tests/test_etag.py @@ -0,0 +1,45 @@ +import unittest +import mock + +from collectfast import etag +from collectfast import settings + + +def test(func): + cls = type("Test{}".format(func.__name__), (unittest.TestCase, )) + setattr(cls, func.__name__, func) + return func + + +class TestGetCacheKey(unittest.TestCase): + def test_get_cache_key(self): + cache_key = etag.get_cache_key('/some/random/path') + prefix_len = len(settings.cache_key_prefix) + self.assertTrue(cache_key.startswith(settings.cache_key_prefix)) + self.assertEqual(32 + prefix_len, len(cache_key)) + + +class TestGetRemoteEtag(unittest.TestCase): + # INTEGRATION TEST + # boto and bot3!!! + pass + + +class TestGetEtag(unittest.TestCase): + pass + + +class TestDestroyEtag(unittest.TestCase): + pass + + +class TestGetFileHash(unittest.TestCase): + pass + + +class TestHasMatchingEtag(unittest.TestCase): + pass + + +class TestCopyFile(unittest.TestCase): + pass From ba4afe0af0894920f3b36c7c0bf51f2fb91b0d38 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 21:36:49 +0100 Subject: [PATCH 08/15] tests --- collectfast/tests/test_etag.py | 62 ++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py index 682420e..ce60723 100644 --- a/collectfast/tests/test_etag.py +++ b/collectfast/tests/test_etag.py @@ -1,38 +1,66 @@ import unittest -import mock +from unittest.mock import patch +import string from collectfast import etag from collectfast import settings def test(func): - cls = type("Test{}".format(func.__name__), (unittest.TestCase, )) + """ + Creates a class that inherits from unittest.TestCase with func as a method. + Create tests like this: + + >>> @test + >>> def test_my_func(case): + >>> case.assertEqual(my_func(), 1337) + """ + cls = type(func.__name__, (unittest.TestCase, ), {func.__name__: func}) setattr(cls, func.__name__, func) - return func + return cls -class TestGetCacheKey(unittest.TestCase): - def test_get_cache_key(self): - cache_key = etag.get_cache_key('/some/random/path') - prefix_len = len(settings.cache_key_prefix) - self.assertTrue(cache_key.startswith(settings.cache_key_prefix)) - self.assertEqual(32 + prefix_len, len(cache_key)) +@test +def test_get_cache_key(case): + cache_key = etag.get_cache_key('/some/random/path') + prefix_len = len(settings.cache_key_prefix) + case.assertTrue(cache_key.startswith(settings.cache_key_prefix)) + case.assertEqual(32 + prefix_len, len(cache_key)) + expected_chars = string.ascii_letters + string.digits + '_' + for c in cache_key: + case.assertIn(c, expected_chars) -class TestGetRemoteEtag(unittest.TestCase): - # INTEGRATION TEST - # boto and bot3!!! - pass +@test +@patch("collectfast.etag.get_remote_etag") +def test_get_destroy_etag(case, mocked): + mocked.return_value = expected_hash = 'hash' + # empty cache + h = etag.get_etag('storage', 'path') + case.assertEqual(h, expected_hash) + mocked.assert_called_once_with('storage', 'path') -class TestGetEtag(unittest.TestCase): - pass + # populated cache + mocked.reset_mock() + h = etag.get_etag('storage', 'path') + case.assertEqual(h, expected_hash) + mocked.assert_not_called() + # test destroy_etag + mocked.reset_mock() + etag.destroy_etag('path') + h = etag.get_etag('storage', 'path') + case.assertEqual(h, expected_hash) + mocked.assert_called_once_with('storage', 'path') -class TestDestroyEtag(unittest.TestCase): - pass +class TestGetRemoteEtag(unittest.TestCase): + # INTEGRATION TEST + # boto and bot3!!! + pass + class TestGetFileHash(unittest.TestCase): pass From 5b073b1459423fe96e6ebd31d4e55fd115d1d200 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Tue, 3 Jan 2017 22:24:28 +0100 Subject: [PATCH 09/15] etag tests --- collectfast/tests/test_etag.py | 56 +++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py index ce60723..4b551bb 100644 --- a/collectfast/tests/test_etag.py +++ b/collectfast/tests/test_etag.py @@ -1,10 +1,16 @@ import unittest -from unittest.mock import patch import string +import tempfile + +from mock import patch +from django.contrib.staticfiles.storage import StaticFilesStorage +from storages.backends.s3boto import S3BotoStorage from collectfast import etag from collectfast import settings +hash_characters = string.ascii_letters + string.digits + def test(func): """ @@ -26,7 +32,7 @@ def test_get_cache_key(case): prefix_len = len(settings.cache_key_prefix) case.assertTrue(cache_key.startswith(settings.cache_key_prefix)) case.assertEqual(32 + prefix_len, len(cache_key)) - expected_chars = string.ascii_letters + string.digits + '_' + expected_chars = hash_characters + '_' for c in cache_key: case.assertIn(c, expected_chars) @@ -55,19 +61,41 @@ def test_get_destroy_etag(case, mocked): mocked.assert_called_once_with('storage', 'path') - -class TestGetRemoteEtag(unittest.TestCase): - # INTEGRATION TEST - # boto and bot3!!! - pass - -class TestGetFileHash(unittest.TestCase): - pass +@test +def test_get_file_hash(case): + storage = StaticFilesStorage() + with tempfile.NamedTemporaryFile(dir=storage.base_location) as f: + f.write(b'spam') + h = etag.get_file_hash(storage, f.name) + case.assertEqual(len(h), 34) + case.assertTrue(h.startswith('"')) + case.assertTrue(h.endswith('"')) + for c in h[1:-1]: + case.assertIn(c, hash_characters) -class TestHasMatchingEtag(unittest.TestCase): - pass +@test +@patch('collectfast.etag.get_etag') +@patch('collectfast.etag.get_file_hash') +def test_has_matching_etag(case, mocked_get_etag, mocked_get_file_hash): + mocked_get_etag.return_value = mocked_get_file_hash.return_value = 'hash' + case.assertTrue(etag.has_matching_etag('rs', 'ss', 'path')) + mocked_get_etag.return_value = 'not same' + case.assertFalse(etag.has_matching_etag('rs', 'ss', 'path')) -class TestCopyFile(unittest.TestCase): - pass +@test +@patch('collectfast.etag.has_matching_etag') +@patch('collectfast.etag.destroy_etag') +def test_should_copy_file(case, mocked_destroy_etag, mocked_has_matching_etag): + remote_storage = S3BotoStorage() + + mocked_has_matching_etag.return_value = True + case.assertFalse(etag.should_copy_file( + remote_storage, 'path', 'prefixed_path', 'source_storage')) + mocked_destroy_etag.assert_not_called() + + mocked_has_matching_etag.return_value = False + case.assertTrue(etag.should_copy_file( + remote_storage, 'path', 'prefixed_path', 'source_storage')) + mocked_destroy_etag.assert_called() From d604776ad253f865bc9ffb1ce6c0302b133f0418 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 02:15:09 +0100 Subject: [PATCH 10/15] tests --- Makefile | 2 +- collectfast/tests/test_command.py | 16 ++++++++++++++++ collectfast/tests/test_etag.py | 15 +-------------- collectfast/tests/utils.py | 22 ++++++++++++++++++++++ runtests.py | 23 ++++++++++++++++++++--- 5 files changed, 60 insertions(+), 18 deletions(-) create mode 100644 collectfast/tests/test_command.py create mode 100644 collectfast/tests/utils.py diff --git a/Makefile b/Makefile index 12adc4d..f2571e2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ test: - python runtests.py + . aws-credentials && ./runtests.py distribute: python setup.py sdist bdist_wheel upload diff --git a/collectfast/tests/test_command.py b/collectfast/tests/test_command.py new file mode 100644 index 0000000..dbc3aa8 --- /dev/null +++ b/collectfast/tests/test_command.py @@ -0,0 +1,16 @@ +from django.core.management import call_command +from django.utils.six import StringIO + +from .utils import test, create_static_file + + +@test +def test_command(case): + create_static_file('static/testfile.txt') + out = StringIO() + call_command('collectstatic', '--no-input', stdout=out) + result = out.getvalue() + case.assertIn("1 static file copied.", result) + call_command('collectstatic', '--no-input', stdout=out) + result = out.getvalue() + case.assertIn("0 static files copied.", result) diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py index 4b551bb..bad3b87 100644 --- a/collectfast/tests/test_etag.py +++ b/collectfast/tests/test_etag.py @@ -8,24 +8,11 @@ from collectfast import etag from collectfast import settings +from .utils import test hash_characters = string.ascii_letters + string.digits -def test(func): - """ - Creates a class that inherits from unittest.TestCase with func as a method. - Create tests like this: - - >>> @test - >>> def test_my_func(case): - >>> case.assertEqual(my_func(), 1337) - """ - cls = type(func.__name__, (unittest.TestCase, ), {func.__name__: func}) - setattr(cls, func.__name__, func) - return cls - - @test def test_get_cache_key(case): cache_key = etag.get_cache_key('/some/random/path') diff --git a/collectfast/tests/utils.py b/collectfast/tests/utils.py new file mode 100644 index 0000000..7659b1c --- /dev/null +++ b/collectfast/tests/utils.py @@ -0,0 +1,22 @@ +import random +import unittest + + +def test(func): + """ + Creates a class that inherits from unittest.TestCase with func as a method. + Create tests like this: + + >>> fn = lambda x: 1337 + >>> @test + >>> def test_fn(case): + >>> case.assertEqual(fn(), 1337) + """ + return type(func.__name__, (unittest.TestCase, ), {func.__name__: func}) + + +def create_static_file(filename): + for i in range(3): + with open(filename, 'w+') as f: + for i in range(500): + f.write(chr(int(random.random() * 64))) diff --git a/runtests.py b/runtests.py index 855d488..5c92447 100755 --- a/runtests.py +++ b/runtests.py @@ -2,7 +2,7 @@ import os import sys - +import shutil from optparse import OptionParser import django @@ -25,6 +25,11 @@ def main(): parent_dir, app_name = os.path.split(app_path) sys.path.insert(0, parent_dir) + # create static dir + staticfiles_dir = "./static/" + if not os.path.exists(staticfiles_dir): + os.makedirs(staticfiles_dir) + settings.configure(**{ "DATABASES": { 'default': { @@ -58,19 +63,31 @@ def main(): ), "STATIC_URL": "/staticfiles/", "STATIC_ROOT": "./", + "STATICFILES_DIRS": [staticfiles_dir], + "STATICFILES_STORAGE": "storages.backends.s3boto3.S3Boto3Storage", "AWS_PRELOAD_METADATA": True, "MIDDLEWARE_CLASSES": [], + + # credentials for integration test bucket + "AWS_STORAGE_BUCKET_NAME": "collectfast", + "AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID", None), + "AWS_SECRET_ACCESS_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY"), + "AWS_S3_REGION_NAME": "eu-central-1", + "AWS_S3_SIGNATURE_VERSION": "s3v4", + "AWS_QUERYSTRING_AUTH": False, }) if options.TEST_SUITE is not None: test_arg = "%s.%s" % (app_name, options.TEST_SUITE) else: test_arg = app_name - if django.VERSION >= (1, 7): - django.setup() + django.setup() call_command("test", test_arg) + # delete static dir + shutil.rmtree(staticfiles_dir) + if __name__ == "__main__": main() From d8d61d7663fb2a50f8407573ce825ad8448a0dac Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 02:16:47 +0100 Subject: [PATCH 11/15] fix tox dependencies --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 5b7b078..1a66218 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,8 @@ deps = django110: Django>=1.10,<1.11 mock==1.3.0 coveralls + django-storages + boto3 commands = coverage run --source=collectfast {toxinidir}/runtests.py - coveralls From f35e63cfca3b826dde7c8db39bf703782c526996 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 02:50:52 +0100 Subject: [PATCH 12/15] tests --- README.rst | 2 +- collectfast/tests/test_command.py | 15 +++++++++------ collectfast/tests/test_etag.py | 2 +- collectfast/tests/utils.py | 1 + tox.ini | 3 ++- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index e0bf81c..55494f1 100644 --- a/README.rst +++ b/README.rst @@ -56,7 +56,7 @@ Usage Collectfast overrides Django's builtin ``collectstatic`` command so just run ``python manage.py collectstatic`` as normal. You can disable -collectfast by using the ``--ignore-etag`` option. +Collectfast by using the ``--disable-collectfast`` option. You can also disable collectfast by setting ``COLLECTFAST_ENABLED = False`` in your settings file. This is useful diff --git a/collectfast/tests/test_command.py b/collectfast/tests/test_command.py index dbc3aa8..98c3ff4 100644 --- a/collectfast/tests/test_command.py +++ b/collectfast/tests/test_command.py @@ -4,13 +4,16 @@ from .utils import test, create_static_file -@test -def test_command(case): - create_static_file('static/testfile.txt') +def call_collectstatic(): out = StringIO() call_command('collectstatic', '--no-input', stdout=out) - result = out.getvalue() + return out.getvalue() + +@test +def test_basics(case): + create_static_file('static/testfile.txt') + result = call_collectstatic() case.assertIn("1 static file copied.", result) - call_command('collectstatic', '--no-input', stdout=out) - result = out.getvalue() + # file state should now be cached + result = call_collectstatic() case.assertIn("0 static files copied.", result) diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py index bad3b87..57cf068 100644 --- a/collectfast/tests/test_etag.py +++ b/collectfast/tests/test_etag.py @@ -85,4 +85,4 @@ def test_should_copy_file(case, mocked_destroy_etag, mocked_has_matching_etag): mocked_has_matching_etag.return_value = False case.assertTrue(etag.should_copy_file( remote_storage, 'path', 'prefixed_path', 'source_storage')) - mocked_destroy_etag.assert_called() + mocked_destroy_etag.assert_called_once_with('prefixed_path') diff --git a/collectfast/tests/utils.py b/collectfast/tests/utils.py index 7659b1c..9e89134 100644 --- a/collectfast/tests/utils.py +++ b/collectfast/tests/utils.py @@ -20,3 +20,4 @@ def create_static_file(filename): with open(filename, 'w+') as f: for i in range(500): f.write(chr(int(random.random() * 64))) + f.close() diff --git a/tox.ini b/tox.ini index 1a66218..f785d3f 100644 --- a/tox.ini +++ b/tox.ini @@ -9,10 +9,11 @@ deps = coveralls django-storages boto3 + boto commands = coverage run --source=collectfast {toxinidir}/runtests.py - coveralls -passenv = TRAVIS TRAVIS_JOB_ID TRAVIS_BRANCH +passenv = TRAVIS TRAVIS_JOB_ID TRAVIS_BRANCH AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY [testenv:flake8] basepython = python deps = flake8 From d381251aad669eb2d8e70fb78874b616d9a8b8f3 Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 16:51:02 +0100 Subject: [PATCH 13/15] fix tests --- collectfast/etag.py | 10 +++++++++- collectfast/tests/test_command.py | 3 ++- collectfast/tests/test_etag.py | 1 - 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/collectfast/etag.py b/collectfast/etag.py index 68ab3ef..410f40c 100644 --- a/collectfast/etag.py +++ b/collectfast/etag.py @@ -1,11 +1,19 @@ import hashlib -from functools import lru_cache from django.core.cache import caches from collectfast import settings from .log import log +try: + from functools import lru_cache +except ImportError: + # make lru_cache do nothing in python 2.7 + def lru_cache(maxsize=128, typed=False): + def decorator(func): + return func + return decorator + cache = caches[settings.cache] diff --git a/collectfast/tests/test_command.py b/collectfast/tests/test_command.py index 98c3ff4..0fff84e 100644 --- a/collectfast/tests/test_command.py +++ b/collectfast/tests/test_command.py @@ -6,9 +6,10 @@ def call_collectstatic(): out = StringIO() - call_command('collectstatic', '--no-input', stdout=out) + call_command('collectstatic', interactive=False, stdout=out) return out.getvalue() + @test def test_basics(case): create_static_file('static/testfile.txt') diff --git a/collectfast/tests/test_etag.py b/collectfast/tests/test_etag.py index 57cf068..7a2c66e 100644 --- a/collectfast/tests/test_etag.py +++ b/collectfast/tests/test_etag.py @@ -1,4 +1,3 @@ -import unittest import string import tempfile From cad09f3221c0788e001a5795edcce392213fc39d Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 17:01:20 +0100 Subject: [PATCH 14/15] test docs --- README.rst | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 55494f1..742b2f3 100644 --- a/README.rst +++ b/README.rst @@ -131,9 +131,23 @@ Discussion is open and welcome. **Testing** -To run tests, setup a virtualenv and install tox with ``pip install tox`` then -run ``tox`` in the project directory. To only run tests for a certain -environment run e.g. ``tox -e py35-django110``. +To run integration tests you need to setup an S3 bucket with the name +``collectfast`` and set your AWS credentials as environment variables. You can +do this by adding them to a file ``aws-credentials`` like this: + +.. code:: bash + + export AWS_ACCESS_KEY_ID="XXXX" + export AWS_SECRET_ACCESS_KEY="XXXX" + +And then running the tests with ``. aws-credentials && python runtests.py``. + +If you don't feel like setting up an S3 bucket, just skip setting the +environment variables. The integration tests will still run but fail. + +To run tests with tox, setup a virtualenv and install tox with +``pip install tox`` then run ``tox`` in the project directory. To only run +tests for a certain environment run e.g. ``tox -e py35-django110``. License From 37a7eeff8944de75aec529195775c8349b554e8a Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Wed, 4 Jan 2017 17:06:50 +0100 Subject: [PATCH 15/15] appveyor test dependencies --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index da5b62f..77a953b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,8 +63,8 @@ install: # about it being out of date. - "pip install --disable-pip-version-check --user --upgrade pip" - # Install Mock - - "%CMD_IN_ENV% pip install mock==1.3.0" + # Install test dependencies + - "%CMD_IN_ENV% pip install mock==1.3.0 boto boto3 django-storages" # Install the build dependencies of the project. If some dependencies contain # compiled extensions and are not provided as pre-built wheel packages,