Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed #18676 -- Allow fast-path deletion of objects

Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
  • Loading branch information...
commit 1cd6e04cd4f768bcd4385b75de433d497d938f82 1 parent 3fcca0e
@akaariai akaariai authored
View
7 django/contrib/admin/util.py
@@ -191,6 +191,13 @@ def nested(self, format_callback=None):
roots.extend(self._nested(root, seen, format_callback))
return roots
+ def can_fast_delete(self, *args, **kwargs):
+ """
+ We always want to load the objects into memory so that we can display
+ them to the user in confirm page.
+ """
+ return False
+
def model_format_dict(obj):
"""
View
63 django/db/models/deletion.py
@@ -77,6 +77,9 @@ def __init__(self, using):
self.data = {}
self.batches = {} # {model: {field: set([instances])}}
self.field_updates = {} # {model: {(field, value): set([instances])}}
+ # fast_deletes is a list of queryset-likes that can be deleted without
+ # fetching the objects into memory.
+ self.fast_deletes = []
# Tracks deletion-order dependency for databases without transactions
# or ability to defer constraint checks. Only concrete model classes
@@ -131,6 +134,43 @@ def add_field_update(self, field, value, objs):
model, {}).setdefault(
(field, value), set()).update(objs)
+ def can_fast_delete(self, objs, from_field=None):
+ """
+ Determines if the objects in the given queryset-like can be
+ fast-deleted. This can be done if there are no cascades, no
+ parents and no signal listeners for the object class.
+
+ The 'from_field' tells where we are coming from - we need this to
+ determine if the objects are in fact to be deleted. Allows also
+ skipping parent -> child -> parent chain preventing fast delete of
+ the child.
+ """
+ if from_field and from_field.rel.on_delete is not CASCADE:
+ return False
+ if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
+ return False
+ model = objs.model
+ if (signals.pre_delete.has_listeners(model)
+ or signals.post_delete.has_listeners(model)
+ or signals.m2m_changed.has_listeners(model)):
+ return False
+ # The use of from_field comes from the need to avoid cascade back to
+ # parent when parent delete is cascading to child.
+ opts = model._meta
+ if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
+ return False
+ # Foreign keys pointing to this model, both from m2m and other
+ # models.
+ for related in opts.get_all_related_objects(
+ include_hidden=True, include_proxy_eq=True):
+ if related.field.rel.on_delete is not DO_NOTHING:
+ return False
+ # GFK deletes
+ for relation in opts.many_to_many:
+ if not relation.rel.through:
+ return False
+ return True
+
def collect(self, objs, source=None, nullable=False, collect_related=True,
source_attr=None, reverse_dependency=False):
"""
@@ -148,6 +188,9 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
models, the one case in which the cascade follows the forwards
direction of an FK rather than the reverse direction.)
"""
+ if self.can_fast_delete(objs):
+ self.fast_deletes.append(objs)
+ return
new_objs = self.add(objs, source, nullable,
reverse_dependency=reverse_dependency)
if not new_objs:
@@ -160,6 +203,10 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
concrete_model = model._meta.concrete_model
for ptr in six.itervalues(concrete_model._meta.parents):
if ptr:
+ # FIXME: This seems to be buggy and execute a query for each
+ # parent object fetch. We have the parent data in the obj,
+ # but we don't have a nice way to turn that data into parent
+ # object instance.
parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
self.collect(parent_objs, source=model,
source_attr=ptr.rel.related_name,
@@ -170,12 +217,12 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
for related in model._meta.get_all_related_objects(
include_hidden=True, include_proxy_eq=True):
field = related.field
- if related.model._meta.auto_created:
- self.add_batch(related.model, field, new_objs)
- else:
- sub_objs = self.related_objects(related, new_objs)
- if not sub_objs:
- continue
+ if field.rel.on_delete == DO_NOTHING:
+ continue
+ sub_objs = self.related_objects(related, new_objs)
+ if self.can_fast_delete(sub_objs, from_field=field):
+ self.fast_deletes.append(sub_objs)
+ elif sub_objs:
field.rel.on_delete(self, field, sub_objs, self.using)
# TODO This entire block is only needed as a special case to
@@ -241,6 +288,10 @@ def delete(self):
sender=model, instance=obj, using=self.using
)
+ # fast deletes
+ for qs in self.fast_deletes:
+ qs._raw_delete(using=self.using)
+
# update fields
for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
query = sql.UpdateQuery(model)
View
8 django/db/models/query.py
@@ -529,6 +529,14 @@ def delete(self):
self._result_cache = None
delete.alters_data = True
+ def _raw_delete(self, using):
+ """
+ Deletes objects found from the given queryset in single direct SQL
+ query. No signals are sent, and there is no protection for cascades.
+ """
+ sql.DeleteQuery(self.model).delete_qs(self, using)
+ _raw_delete.alters_data = True
+
def update(self, **kwargs):
"""
Updates all elements in the current QuerySet, setting all the given
View
3  django/db/models/sql/compiler.py
@@ -934,7 +934,8 @@ def as_sql(self):
qn = self.quote_name_unless_alias
result = ['DELETE FROM %s' % qn(self.query.tables[0])]
where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
- result.append('WHERE %s' % where)
+ if where:
+ result.append('WHERE %s' % where)
return ' '.join(result), tuple(params)
class SQLUpdateCompiler(SQLCompiler):
View
32 django/db/models/sql/subqueries.py
@@ -3,6 +3,7 @@
"""
from django.core.exceptions import FieldError
+from django.db import connections
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models.sql.constants import *
@@ -46,6 +47,37 @@ def delete_batch(self, pk_list, using, field=None):
pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
self.do_query(self.model._meta.db_table, where, using=using)
+ def delete_qs(self, query, using):
+ innerq = query.query
+ # Make sure the inner query has at least one table in use.
+ innerq.get_initial_alias()
+ # The same for our new query.
+ self.get_initial_alias()
+ innerq_used_tables = [t for t in innerq.tables
+ if innerq.alias_refcount[t]]
+ if ((not innerq_used_tables or innerq_used_tables == self.tables)
+ and not len(innerq.having)):
+ # There is only the base table in use in the query, and there are
+ # no aggregate filtering going on.
+ self.where = innerq.where
+ else:
+ pk = query.model._meta.pk
+ if not connections[using].features.update_can_self_select:
+ # We can't do the delete using subquery.
+ values = list(query.values_list('pk', flat=True))
+ if not values:
+ return
+ self.delete_batch(values, using)
+ return
+ else:
+ values = innerq
+ innerq.select = [(self.get_initial_alias(), pk.column)]
+ where = self.where_class()
+ where.add((Constraint(None, pk.column, pk), 'in', values), AND)
+ self.where = where
+ self.get_compiler(using).execute_sql(None)
+
+
class UpdateQuery(Query):
"""
Represents an "update" SQL query.
View
15 docs/ref/models/querysets.txt
@@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
:data:`~django.db.models.signals.post_delete` signals for all deleted objects
(including cascaded deletions).
+.. versionadded:: 1.5
+ Allow fast-path deletion of objects
+
+Django needs to fetch objects into memory to send signals and handle cascades.
+However, if there are no cascades and no signals, then Django may take a
+fast-path and delete objects without fetching into memory. For large
+deletes this can result in significantly reduced memory usage. The amount of
+executed queries can be reduced, too.
+
+ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
+DO_NOTHING do not prevent taking the fast-path in deletion.
+
+Note that the queries generated in object deletion is an implementation
+detail subject to change.
+
.. _field-lookups:
Field lookups
View
6 docs/releases/1.5.txt
@@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
* Django now provides a mod_wsgi :doc:`auth handler
</howto/deployment/wsgi/apache-auth>`
+* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
+ and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
+ fast-path in some cases. The fast-path allows for less queries and less
+ objects fetched into memory. See :meth:`QuerySet.delete()
+ <django.db.models.query.QuerySet.delete>` for details.
+
Backwards incompatible changes in 1.5
=====================================
View
20 tests/modeltests/delete/models.py
@@ -95,7 +95,7 @@ class MRNull(models.Model):
class Avatar(models.Model):
- pass
+ desc = models.TextField(null=True)
class User(models.Model):
@@ -108,3 +108,21 @@ class HiddenUser(models.Model):
class HiddenUserProfile(models.Model):
user = models.ForeignKey(HiddenUser)
+
+class M2MTo(models.Model):
+ pass
+
+class M2MFrom(models.Model):
+ m2m = models.ManyToManyField(M2MTo)
+
+class Parent(models.Model):
+ pass
+
+class Child(Parent):
+ pass
+
+class Base(models.Model):
+ pass
+
+class RelToBase(models.Model):
+ base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)
View
101 tests/modeltests/delete/tests.py
@@ -1,11 +1,12 @@
from __future__ import absolute_import
-from django.db import models, IntegrityError
+from django.db import models, IntegrityError, connection
from django.test import TestCase, skipUnlessDBFeature, skipIfDBFeature
from django.utils.six.moves import xrange
from .models import (R, RChild, S, T, U, A, M, MR, MRNull,
- create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile)
+ create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile,
+ M2MTo, M2MFrom, Parent, Child, Base)
class OnDeleteTests(TestCase):
@@ -74,6 +75,16 @@ def check_do_nothing(sender, **kwargs):
self.assertEqual(replacement_r, a.donothing)
models.signals.pre_delete.disconnect(check_do_nothing)
+ def test_do_nothing_qscount(self):
+ """
+ Test that a models.DO_NOTHING relation doesn't trigger a query.
+ """
+ b = Base.objects.create()
+ with self.assertNumQueries(1):
+ # RelToBase should not be queried.
+ b.delete()
+ self.assertEqual(Base.objects.count(), 0)
+
def test_inheritance_cascade_up(self):
child = RChild.objects.create()
child.delete()
@@ -229,16 +240,34 @@ def test_can_defer_constraint_checks(self):
# 1 query to delete the avatar
# The important thing is that when we can defer constraint checks there
# is no need to do an UPDATE on User.avatar to null it out.
+
+ # Attach a signal to make sure we will not do fast_deletes.
+ calls = []
+ def noop(*args, **kwargs):
+ calls.append('')
+ models.signals.post_delete.connect(noop, sender=User)
+
self.assertNumQueries(3, a.delete)
self.assertFalse(User.objects.exists())
self.assertFalse(Avatar.objects.exists())
+ self.assertEquals(len(calls), 1)
+ models.signals.post_delete.disconnect(noop, sender=User)
@skipIfDBFeature("can_defer_constraint_checks")
def test_cannot_defer_constraint_checks(self):
u = User.objects.create(
avatar=Avatar.objects.create()
)
+ # Attach a signal to make sure we will not do fast_deletes.
+ calls = []
+ def noop(*args, **kwargs):
+ calls.append('')
+ models.signals.post_delete.connect(noop, sender=User)
+
a = Avatar.objects.get(pk=u.avatar_id)
+ # The below doesn't make sense... Why do we need to null out
+ # user.avatar if we are going to delete the user immediately after it,
+ # and there are no more cascades.
# 1 query to find the users for the avatar.
# 1 query to delete the user
# 1 query to null out user.avatar, because we can't defer the constraint
@@ -246,6 +275,8 @@ def test_cannot_defer_constraint_checks(self):
self.assertNumQueries(4, a.delete)
self.assertFalse(User.objects.exists())
self.assertFalse(Avatar.objects.exists())
+ self.assertEquals(len(calls), 1)
+ models.signals.post_delete.disconnect(noop, sender=User)
def test_hidden_related(self):
r = R.objects.create()
@@ -254,3 +285,69 @@ def test_hidden_related(self):
r.delete()
self.assertEqual(HiddenUserProfile.objects.count(), 0)
+
+class FastDeleteTests(TestCase):
+
+ def test_fast_delete_fk(self):
+ u = User.objects.create(
+ avatar=Avatar.objects.create()
+ )
+ a = Avatar.objects.get(pk=u.avatar_id)
+ # 1 query to fast-delete the user
+ # 1 query to delete the avatar
+ self.assertNumQueries(2, a.delete)
+ self.assertFalse(User.objects.exists())
+ self.assertFalse(Avatar.objects.exists())
+
+ def test_fast_delete_m2m(self):
+ t = M2MTo.objects.create()
+ f = M2MFrom.objects.create()
+ f.m2m.add(t)
+ # 1 to delete f, 1 to fast-delete m2m for f
+ self.assertNumQueries(2, f.delete)
+
+ def test_fast_delete_revm2m(self):
+ t = M2MTo.objects.create()
+ f = M2MFrom.objects.create()
+ f.m2m.add(t)
+ # 1 to delete t, 1 to fast-delete t's m_set
+ self.assertNumQueries(2, f.delete)
+
+ def test_fast_delete_qs(self):
+ u1 = User.objects.create()
+ u2 = User.objects.create()
+ self.assertNumQueries(1, User.objects.filter(pk=u1.pk).delete)
+ self.assertEquals(User.objects.count(), 1)
+ self.assertTrue(User.objects.filter(pk=u2.pk).exists())
+
+ def test_fast_delete_joined_qs(self):
+ a = Avatar.objects.create(desc='a')
+ User.objects.create(avatar=a)
+ u2 = User.objects.create()
+ expected_queries = 1 if connection.features.update_can_self_select else 2
+ self.assertNumQueries(expected_queries,
+ User.objects.filter(avatar__desc='a').delete)
+ self.assertEquals(User.objects.count(), 1)
+ self.assertTrue(User.objects.filter(pk=u2.pk).exists())
+
+ def test_fast_delete_inheritance(self):
+ c = Child.objects.create()
+ p = Parent.objects.create()
+ # 1 for self, 1 for parent
+ # However, this doesn't work as child.parent access creates a query,
+ # and this means we will be generating extra queries (a lot for large
+ # querysets). This is not a fast-delete problem.
+ # self.assertNumQueries(2, c.delete)
+ c.delete()
+ self.assertFalse(Child.objects.exists())
+ self.assertEquals(Parent.objects.count(), 1)
+ self.assertEquals(Parent.objects.filter(pk=p.pk).count(), 1)
+ # 1 for self delete, 1 for fast delete of empty "child" qs.
+ self.assertNumQueries(2, p.delete)
+ self.assertFalse(Parent.objects.exists())
+ # 1 for self delete, 1 for fast delete of empty "child" qs.
+ c = Child.objects.create()
+ p = c.parent_ptr
+ self.assertNumQueries(2, p.delete)
+ self.assertFalse(Parent.objects.exists())
+ self.assertFalse(Child.objects.exists())
View
3  tests/regressiontests/admin_util/models.py
@@ -39,3 +39,6 @@ class Guest(models.Model):
class Meta:
verbose_name = "awesome guest"
+
+class EventGuide(models.Model):
+ event = models.ForeignKey(Event, on_delete=models.DO_NOTHING)
View
13 tests/regressiontests/admin_util/tests.py
@@ -17,7 +17,7 @@
from django.utils.safestring import mark_safe
from django.utils import six
-from .models import Article, Count, Event, Location
+from .models import Article, Count, Event, Location, EventGuide
class NestedObjectsTests(TestCase):
@@ -71,6 +71,17 @@ def test_queries(self):
# Should not require additional queries to populate the nested graph.
self.assertNumQueries(2, self._collect, 0)
+ def test_on_delete_do_nothing(self):
+ """
+ Check that the nested collector doesn't query for DO_NOTHING objects.
+ """
+ n = NestedObjects(using=DEFAULT_DB_ALIAS)
+ objs = [Event.objects.create()]
+ EventGuide.objects.create(event=objs[0])
+ with self.assertNumQueries(2):
+ # One for Location, one for Guest, and no query for EventGuide
+ n.collect(objs)
+
class UtilTests(unittest.TestCase):
def test_values_from_lookup_field(self):
"""
View
11 tests/regressiontests/delete_regress/tests.py
@@ -3,7 +3,7 @@
import datetime
from django.conf import settings
-from django.db import backend, transaction, DEFAULT_DB_ALIAS
+from django.db import backend, transaction, DEFAULT_DB_ALIAS, models
from django.test import TestCase, TransactionTestCase, skipUnlessDBFeature
from .models import (Book, Award, AwardNote, Person, Child, Toy, PlayedWith,
@@ -139,17 +139,24 @@ def test_to_field(self):
eaten = Eaten.objects.create(food=apple, meal="lunch")
apple.delete()
+ self.assertFalse(Food.objects.exists())
+ self.assertFalse(Eaten.objects.exists())
+
class LargeDeleteTests(TestCase):
def test_large_deletes(self):
"Regression for #13309 -- if the number of objects > chunk size, deletion still occurs"
for x in range(300):
track = Book.objects.create(pagecount=x+100)
+ # attach a signal to make sure we will not fast-delete
+ def noop(*args, **kwargs):
+ pass
+ models.signals.post_delete.connect(noop, sender=Book)
Book.objects.all().delete()
+ models.signals.post_delete.disconnect(noop, sender=Book)
self.assertEqual(Book.objects.count(), 0)
-
class ProxyDeleteTest(TestCase):
"""
Tests on_delete behavior for proxy models.
View
12 tests/regressiontests/dispatch/tests/test_dispatcher.py
@@ -127,15 +127,15 @@ def testDisconnection(self):
self._testIsClean(a_signal)
def test_has_listeners(self):
- self.assertIs(a_signal.has_listeners(), False)
- self.assertIs(a_signal.has_listeners(sender=object()), False)
+ self.assertFalse(a_signal.has_listeners())
+ self.assertFalse(a_signal.has_listeners(sender=object()))
receiver_1 = Callable()
a_signal.connect(receiver_1)
- self.assertIs(a_signal.has_listeners(), True)
- self.assertIs(a_signal.has_listeners(sender=object()), True)
+ self.assertTrue(a_signal.has_listeners())
+ self.assertTrue(a_signal.has_listeners(sender=object()))
a_signal.disconnect(receiver_1)
- self.assertIs(a_signal.has_listeners(), False)
- self.assertIs(a_signal.has_listeners(sender=object()), False)
+ self.assertFalse(a_signal.has_listeners())
+ self.assertFalse(a_signal.has_listeners(sender=object()))
class ReceiverTestCase(unittest.TestCase):
Please sign in to comment.
Something went wrong with that request. Please try again.