forked from SmileyChris/easy-thumbnails
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request SmileyChris#312 from jorabra/new-cleanup-command
Re-implement cleanup command.
- Loading branch information
Showing
1 changed file
with
147 additions
and
71 deletions.
There are no files selected for viewing
218 changes: 147 additions & 71 deletions
218
easy_thumbnails/management/commands/thumbnail_cleanup.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,76 +1,152 @@ | ||
import gc | ||
import os | ||
import re | ||
|
||
from django.db import models | ||
from django.core.management.base import NoArgsCommand | ||
import time | ||
from datetime import datetime, date, timedelta | ||
from optparse import make_option | ||
|
||
from django.core.files.storage import get_storage_class | ||
from django.core.management.base import BaseCommand | ||
from easy_thumbnails.conf import settings | ||
from easy_thumbnails.models import Source | ||
|
||
|
||
class ThumbnailCollectionCleaner(object): | ||
""" | ||
Remove thumbnails and DB references to non-existing source images. | ||
""" | ||
sources = 0 | ||
thumbnails = 0 | ||
thumbnails_deleted = 0 | ||
source_refs_deleted = 0 | ||
execution_time = 0 | ||
|
||
try: | ||
set | ||
except NameError: | ||
from sets import Set as set # For Python 2.3 | ||
|
||
thumb_re = re.compile( | ||
r'^%s(.*)\.\d{1,}x\d{1,}_[-\w]*q([1-9]\d?|100)\.jpg' % | ||
settings.THUMBNAIL_PREFIX) | ||
|
||
|
||
def get_thumbnail_path(path): | ||
basedir = settings.THUMBNAIL_BASEDIR | ||
subdir = settings.THUMBNAIL_SUBDIR | ||
return os.path.join(basedir, path, subdir) | ||
|
||
|
||
def clean_up(): | ||
paths = set() | ||
for app in models.get_apps(): | ||
model_list = models.get_models(app) | ||
for model in model_list: | ||
for field in model._meta.fields: | ||
if isinstance(field, models.ImageField): | ||
#TODO: take care of date formatted and callable upload_to. | ||
if (not callable(field.upload_to) and | ||
field.upload_to.find("%") == -1): | ||
paths = paths.union((field.upload_to,)) | ||
paths = list(paths) | ||
for path in paths: | ||
thumbnail_path = get_thumbnail_path(path) | ||
def _get_absolute_path(self, path): | ||
return os.path.join(settings.MEDIA_ROOT, path) | ||
|
||
def _get_relative_path(self, path): | ||
return os.path.relpath(path, settings.MEDIA_ROOT) | ||
|
||
def _check_if_exists(self, storage, path): | ||
try: | ||
file_list = os.listdir(os.path.join(settings.MEDIA_ROOT, | ||
thumbnail_path)) | ||
except OSError: | ||
continue # Dir doesn't exists, no thumbnails here. | ||
for fn in file_list: | ||
m = thumb_re.match(fn) | ||
if m: | ||
# Due to that the naming of thumbnails replaces the dot before | ||
# extension with an underscore we have 2 possibilities for the | ||
# original filename. If either present we do not delete | ||
# suspected thumbnail. | ||
# org_fn is the expected original filename w/o extension | ||
# org_fn_alt is the expected original filename with extension | ||
org_fn = m.group(1) | ||
org_fn_exists = os.path.isfile( | ||
os.path.join(settings.MEDIA_ROOT, path, org_fn)) | ||
|
||
usc_pos = org_fn.rfind("_") | ||
if usc_pos != -1: | ||
org_fn_alt = "%s.%s" % ( | ||
org_fn[0:usc_pos], org_fn[usc_pos + 1:]) | ||
org_fn_alt_exists = os.path.isfile( | ||
os.path.join(settings.MEDIA_ROOT, path, org_fn_alt)) | ||
else: | ||
org_fn_alt_exists = False | ||
if not org_fn_exists and not org_fn_alt_exists: | ||
del_me = os.path.join(settings.MEDIA_ROOT, | ||
thumbnail_path, fn) | ||
os.remove(del_me) | ||
|
||
|
||
class Command(NoArgsCommand): | ||
help = "Deletes thumbnails that no longer have an original file." | ||
requires_model_validation = False | ||
|
||
def handle_noargs(self, **options): | ||
clean_up() | ||
return storage.exists(path) | ||
except Exception as e: | ||
print "Something went wrong when checking existance of %s:" % path | ||
print str(e) | ||
|
||
def _delete_sources_by_id(self, ids): | ||
Source.objects.all().filter(id__in=ids).delete() | ||
|
||
def clean_up(self, dry_run=False, verbosity=1, last_n_days=0, | ||
cleanup_path=None, storage=None): | ||
""" | ||
Iterate through sources. Delete database references to sources | ||
not existing, including its corresponding thumbnails (files and | ||
database references). | ||
""" | ||
if dry_run: | ||
print "Dry run..." | ||
|
||
if not storage: | ||
storage = get_storage_class(settings.THUMBNAIL_DEFAULT_STORAGE)() | ||
|
||
sources_to_delete = [] | ||
time_start = time.time() | ||
|
||
query = Source.objects.all() | ||
if last_n_days > 0: | ||
today = date.today() | ||
query = query.filter( | ||
modified__range=(today - timedelta(days=last_n_days), today)) | ||
if cleanup_path: | ||
query = query.filter(name__startswith=cleanup_path) | ||
|
||
for source in queryset_iterator(query): | ||
self.sources += 1 | ||
abs_source_path = self._get_absolute_path(source.name) | ||
|
||
if not self._check_if_exists(storage, abs_source_path): | ||
if verbosity > 0: | ||
print "Source not present:", abs_source_path | ||
self.source_refs_deleted += 1 | ||
sources_to_delete.append(source.id) | ||
|
||
for thumb in source.thumbnails.all(): | ||
self.thumbnails_deleted += 1 | ||
abs_thumbnail_path = self._get_absolute_path(thumb.name) | ||
|
||
if self._check_if_exists(storage, abs_thumbnail_path): | ||
if not dry_run: | ||
storage.delete(abs_thumbnail_path) | ||
if verbosity > 0: | ||
print "Deleting thumbnail:", abs_thumbnail_path | ||
|
||
if len(sources_to_delete) >= 1000 and not dry_run: | ||
self._delete_sources_by_id(sources_to_delete) | ||
sources_to_delete = [] | ||
|
||
if not dry_run: | ||
self._delete_sources_by_id(sources_to_delete) | ||
self.execution_time = round(time.time() - time_start) | ||
|
||
def print_stats(self): | ||
""" | ||
Print statistics about the cleanup performed. | ||
""" | ||
print '{:-<48}'.format(str(datetime.now().strftime('%Y-%m-%d %H:%M '))) | ||
print "{:<40} {:>7}".format("Sources checked:", self.sources) | ||
print "{:<40} {:>7}".format("Source references deleted from DB:", | ||
self.source_refs_deleted) | ||
print "{:<40} {:>7}".format("Thumbnails deleted from disk:", | ||
self.thumbnails_deleted) | ||
print "(Completed in %s seconds)\n" % self.execution_time | ||
|
||
|
||
def queryset_iterator(queryset, chunksize=1000): | ||
""" | ||
The queryset iterator helps to keep the memory consumption down. | ||
And also making it easier to process for weaker computers. | ||
""" | ||
|
||
primary_key = 0 | ||
last_pk = queryset.order_by('-pk')[0].pk | ||
queryset = queryset.order_by('pk') | ||
while primary_key < last_pk: | ||
for row in queryset.filter(pk__gt=primary_key)[:chunksize]: | ||
primary_key = row.pk | ||
yield row | ||
gc.collect() | ||
|
||
|
||
class Command(BaseCommand): | ||
help = """ Deletes thumbnails that no longer have an original file. """ | ||
|
||
option_list = BaseCommand.option_list + ( | ||
make_option( | ||
'--dry-run', | ||
action='store_true', | ||
dest='dry_run', | ||
default=False, | ||
help='Dry run the execution.'), | ||
make_option( | ||
'--last-n-days', | ||
action='store', | ||
dest='last_n_days', | ||
default=0, | ||
type='int', | ||
help='The number of days back in time to clean thumbnails for.'), | ||
make_option( | ||
'--path', | ||
action='store', | ||
dest='cleanup_path', | ||
type='string', | ||
help='Specify a path to clean up.'), | ||
) | ||
|
||
def handle(self, *args, **options): | ||
tcc = ThumbnailCollectionCleaner() | ||
tcc.clean_up( | ||
dry_run=options.get('dry_run', False), | ||
verbosity=int(options.get('verbosity', 1)), | ||
last_n_days=int(options.get('last_n_days', 0)), | ||
cleanup_path=options.get('cleanup_path')) | ||
tcc.print_stats() |