Skip to content

Commit

Permalink
Fixed #18676 -- Allow fast-path deletion of objects
Browse files Browse the repository at this point in the history
Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
  • Loading branch information
akaariai committed Sep 28, 2012
1 parent 3fcca0e commit 1cd6e04
Show file tree
Hide file tree
Showing 13 changed files with 275 additions and 19 deletions.
7 changes: 7 additions & 0 deletions django/contrib/admin/util.py
Expand Up @@ -191,6 +191,13 @@ def nested(self, format_callback=None):
roots.extend(self._nested(root, seen, format_callback)) roots.extend(self._nested(root, seen, format_callback))
return roots return roots


def can_fast_delete(self, *args, **kwargs):
"""
We always want to load the objects into memory so that we can display
them to the user in confirm page.
"""
return False



def model_format_dict(obj): def model_format_dict(obj):
""" """
Expand Down
63 changes: 57 additions & 6 deletions django/db/models/deletion.py
Expand Up @@ -77,6 +77,9 @@ def __init__(self, using):
self.data = {} self.data = {}
self.batches = {} # {model: {field: set([instances])}} self.batches = {} # {model: {field: set([instances])}}
self.field_updates = {} # {model: {(field, value): set([instances])}} self.field_updates = {} # {model: {(field, value): set([instances])}}
# fast_deletes is a list of queryset-likes that can be deleted without
# fetching the objects into memory.
self.fast_deletes = []


# Tracks deletion-order dependency for databases without transactions # Tracks deletion-order dependency for databases without transactions
# or ability to defer constraint checks. Only concrete model classes # or ability to defer constraint checks. Only concrete model classes
Expand Down Expand Up @@ -131,6 +134,43 @@ def add_field_update(self, field, value, objs):
model, {}).setdefault( model, {}).setdefault(
(field, value), set()).update(objs) (field, value), set()).update(objs)


def can_fast_delete(self, objs, from_field=None):
"""
Determines if the objects in the given queryset-like can be
fast-deleted. This can be done if there are no cascades, no
parents and no signal listeners for the object class.
The 'from_field' tells where we are coming from - we need this to
determine if the objects are in fact to be deleted. Allows also
skipping parent -> child -> parent chain preventing fast delete of
the child.
"""
if from_field and from_field.rel.on_delete is not CASCADE:
return False
if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
return False
model = objs.model
if (signals.pre_delete.has_listeners(model)
or signals.post_delete.has_listeners(model)
or signals.m2m_changed.has_listeners(model)):
return False
# The use of from_field comes from the need to avoid cascade back to
# parent when parent delete is cascading to child.
opts = model._meta
if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
return False
# Foreign keys pointing to this model, both from m2m and other
# models.
for related in opts.get_all_related_objects(
include_hidden=True, include_proxy_eq=True):
if related.field.rel.on_delete is not DO_NOTHING:
return False
# GFK deletes
for relation in opts.many_to_many:
if not relation.rel.through:
return False
return True

def collect(self, objs, source=None, nullable=False, collect_related=True, def collect(self, objs, source=None, nullable=False, collect_related=True,
source_attr=None, reverse_dependency=False): source_attr=None, reverse_dependency=False):
""" """
Expand All @@ -148,6 +188,9 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
models, the one case in which the cascade follows the forwards models, the one case in which the cascade follows the forwards
direction of an FK rather than the reverse direction.) direction of an FK rather than the reverse direction.)
""" """
if self.can_fast_delete(objs):
self.fast_deletes.append(objs)
return
new_objs = self.add(objs, source, nullable, new_objs = self.add(objs, source, nullable,
reverse_dependency=reverse_dependency) reverse_dependency=reverse_dependency)
if not new_objs: if not new_objs:
Expand All @@ -160,6 +203,10 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
concrete_model = model._meta.concrete_model concrete_model = model._meta.concrete_model
for ptr in six.itervalues(concrete_model._meta.parents): for ptr in six.itervalues(concrete_model._meta.parents):
if ptr: if ptr:
# FIXME: This seems to be buggy and execute a query for each
# parent object fetch. We have the parent data in the obj,
# but we don't have a nice way to turn that data into parent
# object instance.
parent_objs = [getattr(obj, ptr.name) for obj in new_objs] parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
self.collect(parent_objs, source=model, self.collect(parent_objs, source=model,
source_attr=ptr.rel.related_name, source_attr=ptr.rel.related_name,
Expand All @@ -170,12 +217,12 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
for related in model._meta.get_all_related_objects( for related in model._meta.get_all_related_objects(
include_hidden=True, include_proxy_eq=True): include_hidden=True, include_proxy_eq=True):
field = related.field field = related.field
if related.model._meta.auto_created: if field.rel.on_delete == DO_NOTHING:
self.add_batch(related.model, field, new_objs) continue
else: sub_objs = self.related_objects(related, new_objs)
sub_objs = self.related_objects(related, new_objs) if self.can_fast_delete(sub_objs, from_field=field):
if not sub_objs: self.fast_deletes.append(sub_objs)
continue elif sub_objs:
field.rel.on_delete(self, field, sub_objs, self.using) field.rel.on_delete(self, field, sub_objs, self.using)


# TODO This entire block is only needed as a special case to # TODO This entire block is only needed as a special case to
Expand Down Expand Up @@ -241,6 +288,10 @@ def delete(self):
sender=model, instance=obj, using=self.using sender=model, instance=obj, using=self.using
) )


# fast deletes
for qs in self.fast_deletes:
qs._raw_delete(using=self.using)

# update fields # update fields
for model, instances_for_fieldvalues in six.iteritems(self.field_updates): for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
query = sql.UpdateQuery(model) query = sql.UpdateQuery(model)
Expand Down
8 changes: 8 additions & 0 deletions django/db/models/query.py
Expand Up @@ -529,6 +529,14 @@ def delete(self):
self._result_cache = None self._result_cache = None
delete.alters_data = True delete.alters_data = True


def _raw_delete(self, using):
"""
Deletes objects found from the given queryset in single direct SQL
query. No signals are sent, and there is no protection for cascades.
"""
sql.DeleteQuery(self.model).delete_qs(self, using)
_raw_delete.alters_data = True

def update(self, **kwargs): def update(self, **kwargs):
""" """
Updates all elements in the current QuerySet, setting all the given Updates all elements in the current QuerySet, setting all the given
Expand Down
3 changes: 2 additions & 1 deletion django/db/models/sql/compiler.py
Expand Up @@ -934,7 +934,8 @@ def as_sql(self):
qn = self.quote_name_unless_alias qn = self.quote_name_unless_alias
result = ['DELETE FROM %s' % qn(self.query.tables[0])] result = ['DELETE FROM %s' % qn(self.query.tables[0])]
where, params = self.query.where.as_sql(qn=qn, connection=self.connection) where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
result.append('WHERE %s' % where) if where:
result.append('WHERE %s' % where)
return ' '.join(result), tuple(params) return ' '.join(result), tuple(params)


class SQLUpdateCompiler(SQLCompiler): class SQLUpdateCompiler(SQLCompiler):
Expand Down
32 changes: 32 additions & 0 deletions django/db/models/sql/subqueries.py
Expand Up @@ -3,6 +3,7 @@
""" """


from django.core.exceptions import FieldError from django.core.exceptions import FieldError
from django.db import connections
from django.db.models.constants import LOOKUP_SEP from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import DateField, FieldDoesNotExist from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models.sql.constants import * from django.db.models.sql.constants import *
Expand Down Expand Up @@ -46,6 +47,37 @@ def delete_batch(self, pk_list, using, field=None):
pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND) pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
self.do_query(self.model._meta.db_table, where, using=using) self.do_query(self.model._meta.db_table, where, using=using)


def delete_qs(self, query, using):
innerq = query.query
# Make sure the inner query has at least one table in use.
innerq.get_initial_alias()
# The same for our new query.
self.get_initial_alias()
innerq_used_tables = [t for t in innerq.tables
if innerq.alias_refcount[t]]
if ((not innerq_used_tables or innerq_used_tables == self.tables)
and not len(innerq.having)):
# There is only the base table in use in the query, and there are
# no aggregate filtering going on.
self.where = innerq.where
else:
pk = query.model._meta.pk
if not connections[using].features.update_can_self_select:
# We can't do the delete using subquery.
values = list(query.values_list('pk', flat=True))
if not values:
return
self.delete_batch(values, using)
return
else:
values = innerq
innerq.select = [(self.get_initial_alias(), pk.column)]
where = self.where_class()
where.add((Constraint(None, pk.column, pk), 'in', values), AND)
self.where = where
self.get_compiler(using).execute_sql(None)


class UpdateQuery(Query): class UpdateQuery(Query):
""" """
Represents an "update" SQL query. Represents an "update" SQL query.
Expand Down
15 changes: 15 additions & 0 deletions docs/ref/models/querysets.txt
Expand Up @@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
:data:`~django.db.models.signals.post_delete` signals for all deleted objects :data:`~django.db.models.signals.post_delete` signals for all deleted objects
(including cascaded deletions). (including cascaded deletions).


.. versionadded:: 1.5
Allow fast-path deletion of objects

Django needs to fetch objects into memory to send signals and handle cascades.
However, if there are no cascades and no signals, then Django may take a
fast-path and delete objects without fetching into memory. For large
deletes this can result in significantly reduced memory usage. The amount of
executed queries can be reduced, too.

ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
DO_NOTHING do not prevent taking the fast-path in deletion.

Note that the queries generated in object deletion is an implementation
detail subject to change.

.. _field-lookups: .. _field-lookups:


Field lookups Field lookups
Expand Down
6 changes: 6 additions & 0 deletions docs/releases/1.5.txt
Expand Up @@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
* Django now provides a mod_wsgi :doc:`auth handler * Django now provides a mod_wsgi :doc:`auth handler
</howto/deployment/wsgi/apache-auth>` </howto/deployment/wsgi/apache-auth>`


* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
fast-path in some cases. The fast-path allows for less queries and less
objects fetched into memory. See :meth:`QuerySet.delete()
<django.db.models.query.QuerySet.delete>` for details.

Backwards incompatible changes in 1.5 Backwards incompatible changes in 1.5
===================================== =====================================


Expand Down
20 changes: 19 additions & 1 deletion tests/modeltests/delete/models.py
Expand Up @@ -95,7 +95,7 @@ class MRNull(models.Model):




class Avatar(models.Model): class Avatar(models.Model):
pass desc = models.TextField(null=True)




class User(models.Model): class User(models.Model):
Expand All @@ -108,3 +108,21 @@ class HiddenUser(models.Model):


class HiddenUserProfile(models.Model): class HiddenUserProfile(models.Model):
user = models.ForeignKey(HiddenUser) user = models.ForeignKey(HiddenUser)

class M2MTo(models.Model):
pass

class M2MFrom(models.Model):
m2m = models.ManyToManyField(M2MTo)

class Parent(models.Model):
pass

class Child(Parent):
pass

class Base(models.Model):
pass

class RelToBase(models.Model):
base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)

0 comments on commit 1cd6e04

Please sign in to comment.