Permalink
Browse files

Fixed #18676 -- Allow fast-path deletion of objects

Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
  • Loading branch information...
1 parent 3fcca0e commit 1cd6e04cd4f768bcd4385b75de433d497d938f82 @akaariai akaariai committed Sep 20, 2012
@@ -191,6 +191,13 @@ def nested(self, format_callback=None):
roots.extend(self._nested(root, seen, format_callback))
return roots
+ def can_fast_delete(self, *args, **kwargs):
+ """
+ We always want to load the objects into memory so that we can display
+ them to the user in confirm page.
+ """
+ return False
+
def model_format_dict(obj):
"""
@@ -77,6 +77,9 @@ def __init__(self, using):
self.data = {}
self.batches = {} # {model: {field: set([instances])}}
self.field_updates = {} # {model: {(field, value): set([instances])}}
+ # fast_deletes is a list of queryset-likes that can be deleted without
+ # fetching the objects into memory.
+ self.fast_deletes = []
# Tracks deletion-order dependency for databases without transactions
# or ability to defer constraint checks. Only concrete model classes
@@ -131,6 +134,43 @@ def add_field_update(self, field, value, objs):
model, {}).setdefault(
(field, value), set()).update(objs)
+ def can_fast_delete(self, objs, from_field=None):
+ """
+ Determines if the objects in the given queryset-like can be
+ fast-deleted. This can be done if there are no cascades, no
+ parents and no signal listeners for the object class.
+
+ The 'from_field' tells where we are coming from - we need this to
+ determine if the objects are in fact to be deleted. Allows also
+ skipping parent -> child -> parent chain preventing fast delete of
+ the child.
+ """
+ if from_field and from_field.rel.on_delete is not CASCADE:
+ return False
+ if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
+ return False
+ model = objs.model
+ if (signals.pre_delete.has_listeners(model)
+ or signals.post_delete.has_listeners(model)
+ or signals.m2m_changed.has_listeners(model)):
+ return False
+ # The use of from_field comes from the need to avoid cascade back to
+ # parent when parent delete is cascading to child.
+ opts = model._meta
+ if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
+ return False
+ # Foreign keys pointing to this model, both from m2m and other
+ # models.
+ for related in opts.get_all_related_objects(
+ include_hidden=True, include_proxy_eq=True):
+ if related.field.rel.on_delete is not DO_NOTHING:
+ return False
+ # GFK deletes
+ for relation in opts.many_to_many:
+ if not relation.rel.through:
+ return False
+ return True
+
def collect(self, objs, source=None, nullable=False, collect_related=True,
source_attr=None, reverse_dependency=False):
"""
@@ -148,6 +188,9 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
models, the one case in which the cascade follows the forwards
direction of an FK rather than the reverse direction.)
"""
+ if self.can_fast_delete(objs):
+ self.fast_deletes.append(objs)
+ return
new_objs = self.add(objs, source, nullable,
reverse_dependency=reverse_dependency)
if not new_objs:
@@ -160,6 +203,10 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
concrete_model = model._meta.concrete_model
for ptr in six.itervalues(concrete_model._meta.parents):
if ptr:
+ # FIXME: This seems to be buggy and execute a query for each
+ # parent object fetch. We have the parent data in the obj,
+ # but we don't have a nice way to turn that data into parent
+ # object instance.
parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
self.collect(parent_objs, source=model,
source_attr=ptr.rel.related_name,
@@ -170,12 +217,12 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
for related in model._meta.get_all_related_objects(
include_hidden=True, include_proxy_eq=True):
field = related.field
- if related.model._meta.auto_created:
- self.add_batch(related.model, field, new_objs)
- else:
- sub_objs = self.related_objects(related, new_objs)
- if not sub_objs:
- continue
+ if field.rel.on_delete == DO_NOTHING:
+ continue
+ sub_objs = self.related_objects(related, new_objs)
+ if self.can_fast_delete(sub_objs, from_field=field):
+ self.fast_deletes.append(sub_objs)
+ elif sub_objs:
field.rel.on_delete(self, field, sub_objs, self.using)
# TODO This entire block is only needed as a special case to
@@ -241,6 +288,10 @@ def delete(self):
sender=model, instance=obj, using=self.using
)
+ # fast deletes
+ for qs in self.fast_deletes:
+ qs._raw_delete(using=self.using)
+
# update fields
for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
query = sql.UpdateQuery(model)
@@ -529,6 +529,14 @@ def delete(self):
self._result_cache = None
delete.alters_data = True
+ def _raw_delete(self, using):
+ """
+ Deletes objects found from the given queryset in single direct SQL
+ query. No signals are sent, and there is no protection for cascades.
+ """
+ sql.DeleteQuery(self.model).delete_qs(self, using)
+ _raw_delete.alters_data = True
+
def update(self, **kwargs):
"""
Updates all elements in the current QuerySet, setting all the given
@@ -934,7 +934,8 @@ def as_sql(self):
qn = self.quote_name_unless_alias
result = ['DELETE FROM %s' % qn(self.query.tables[0])]
where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
- result.append('WHERE %s' % where)
+ if where:
+ result.append('WHERE %s' % where)
return ' '.join(result), tuple(params)
class SQLUpdateCompiler(SQLCompiler):
@@ -3,6 +3,7 @@
"""
from django.core.exceptions import FieldError
+from django.db import connections
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models.sql.constants import *
@@ -46,6 +47,37 @@ def delete_batch(self, pk_list, using, field=None):
pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
self.do_query(self.model._meta.db_table, where, using=using)
+ def delete_qs(self, query, using):
+ innerq = query.query
+ # Make sure the inner query has at least one table in use.
+ innerq.get_initial_alias()
+ # The same for our new query.
+ self.get_initial_alias()
+ innerq_used_tables = [t for t in innerq.tables
+ if innerq.alias_refcount[t]]
+ if ((not innerq_used_tables or innerq_used_tables == self.tables)
+ and not len(innerq.having)):
+ # There is only the base table in use in the query, and there are
+ # no aggregate filtering going on.
+ self.where = innerq.where
+ else:
+ pk = query.model._meta.pk
+ if not connections[using].features.update_can_self_select:
+ # We can't do the delete using subquery.
+ values = list(query.values_list('pk', flat=True))
+ if not values:
+ return
+ self.delete_batch(values, using)
+ return
+ else:
+ values = innerq
+ innerq.select = [(self.get_initial_alias(), pk.column)]
+ where = self.where_class()
+ where.add((Constraint(None, pk.column, pk), 'in', values), AND)
+ self.where = where
+ self.get_compiler(using).execute_sql(None)
+
+
class UpdateQuery(Query):
"""
Represents an "update" SQL query.
@@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
:data:`~django.db.models.signals.post_delete` signals for all deleted objects
(including cascaded deletions).
+.. versionadded:: 1.5
+ Allow fast-path deletion of objects
+
+Django needs to fetch objects into memory to send signals and handle cascades.
+However, if there are no cascades and no signals, then Django may take a
+fast-path and delete objects without fetching into memory. For large
+deletes this can result in significantly reduced memory usage. The amount of
+executed queries can be reduced, too.
+
+ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
+DO_NOTHING do not prevent taking the fast-path in deletion.
+
+Note that the queries generated in object deletion is an implementation
+detail subject to change.
+
.. _field-lookups:
Field lookups
@@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
* Django now provides a mod_wsgi :doc:`auth handler
</howto/deployment/wsgi/apache-auth>`
+* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
+ and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
+ fast-path in some cases. The fast-path allows for less queries and less
+ objects fetched into memory. See :meth:`QuerySet.delete()
+ <django.db.models.query.QuerySet.delete>` for details.
+
Backwards incompatible changes in 1.5
=====================================
@@ -95,7 +95,7 @@ class MRNull(models.Model):
class Avatar(models.Model):
- pass
+ desc = models.TextField(null=True)
class User(models.Model):
@@ -108,3 +108,21 @@ class HiddenUser(models.Model):
class HiddenUserProfile(models.Model):
user = models.ForeignKey(HiddenUser)
+
+class M2MTo(models.Model):
+ pass
+
+class M2MFrom(models.Model):
+ m2m = models.ManyToManyField(M2MTo)
+
+class Parent(models.Model):
+ pass
+
+class Child(Parent):
+ pass
+
+class Base(models.Model):
+ pass
+
+class RelToBase(models.Model):
+ base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)
Oops, something went wrong.

0 comments on commit 1cd6e04

Please sign in to comment.