Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #18676 -- Allow fast-path deletion of objects

Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
  • Loading branch information...
commit 1cd6e04cd4f768bcd4385b75de433d497d938f82 1 parent 3fcca0e
Anssi Kääriäinen authored September 20, 2012
7  django/contrib/admin/util.py
@@ -191,6 +191,13 @@ def nested(self, format_callback=None):
191 191
             roots.extend(self._nested(root, seen, format_callback))
192 192
         return roots
193 193
 
  194
+    def can_fast_delete(self, *args, **kwargs):
  195
+        """
  196
+        We always want to load the objects into memory so that we can display
  197
+        them to the user in confirm page.
  198
+        """
  199
+        return False
  200
+
194 201
 
195 202
 def model_format_dict(obj):
196 203
     """
63  django/db/models/deletion.py
@@ -77,6 +77,9 @@ def __init__(self, using):
77 77
         self.data = {}
78 78
         self.batches = {} # {model: {field: set([instances])}}
79 79
         self.field_updates = {} # {model: {(field, value): set([instances])}}
  80
+        # fast_deletes is a list of queryset-likes that can be deleted without
  81
+        # fetching the objects into memory.
  82
+        self.fast_deletes = [] 
80 83
 
81 84
         # Tracks deletion-order dependency for databases without transactions
82 85
         # or ability to defer constraint checks. Only concrete model classes
@@ -131,6 +134,43 @@ def add_field_update(self, field, value, objs):
131 134
             model, {}).setdefault(
132 135
             (field, value), set()).update(objs)
133 136
 
  137
+    def can_fast_delete(self, objs, from_field=None):
  138
+        """
  139
+        Determines if the objects in the given queryset-like can be
  140
+        fast-deleted. This can be done if there are no cascades, no
  141
+        parents and no signal listeners for the object class.
  142
+
  143
+        The 'from_field' tells where we are coming from - we need this to
  144
+        determine if the objects are in fact to be deleted. Allows also
  145
+        skipping parent -> child -> parent chain preventing fast delete of
  146
+        the child.
  147
+        """
  148
+        if from_field and from_field.rel.on_delete is not CASCADE:
  149
+            return False
  150
+        if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
  151
+            return False
  152
+        model = objs.model
  153
+        if (signals.pre_delete.has_listeners(model)
  154
+                or signals.post_delete.has_listeners(model)
  155
+                or signals.m2m_changed.has_listeners(model)):
  156
+            return False
  157
+        # The use of from_field comes from the need to avoid cascade back to
  158
+        # parent when parent delete is cascading to child.
  159
+        opts = model._meta
  160
+        if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
  161
+            return False
  162
+        # Foreign keys pointing to this model, both from m2m and other
  163
+        # models.
  164
+        for related in opts.get_all_related_objects(
  165
+            include_hidden=True, include_proxy_eq=True):
  166
+            if related.field.rel.on_delete is not DO_NOTHING:
  167
+                return False
  168
+        # GFK deletes
  169
+        for relation in opts.many_to_many:
  170
+            if not relation.rel.through:
  171
+                return False
  172
+        return True
  173
+
134 174
     def collect(self, objs, source=None, nullable=False, collect_related=True,
135 175
         source_attr=None, reverse_dependency=False):
136 176
         """
@@ -148,6 +188,9 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
148 188
         models, the one case in which the cascade follows the forwards
149 189
         direction of an FK rather than the reverse direction.)
150 190
         """
  191
+        if self.can_fast_delete(objs):
  192
+            self.fast_deletes.append(objs)
  193
+            return
151 194
         new_objs = self.add(objs, source, nullable,
152 195
                             reverse_dependency=reverse_dependency)
153 196
         if not new_objs:
@@ -160,6 +203,10 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
160 203
         concrete_model = model._meta.concrete_model
161 204
         for ptr in six.itervalues(concrete_model._meta.parents):
162 205
             if ptr:
  206
+                # FIXME: This seems to be buggy and execute a query for each
  207
+                # parent object fetch. We have the parent data in the obj,
  208
+                # but we don't have a nice way to turn that data into parent
  209
+                # object instance.
163 210
                 parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
164 211
                 self.collect(parent_objs, source=model,
165 212
                              source_attr=ptr.rel.related_name,
@@ -170,12 +217,12 @@ def collect(self, objs, source=None, nullable=False, collect_related=True,
170 217
             for related in model._meta.get_all_related_objects(
171 218
                     include_hidden=True, include_proxy_eq=True):
172 219
                 field = related.field
173  
-                if related.model._meta.auto_created:
174  
-                    self.add_batch(related.model, field, new_objs)
175  
-                else:
176  
-                    sub_objs = self.related_objects(related, new_objs)
177  
-                    if not sub_objs:
178  
-                        continue
  220
+                if field.rel.on_delete == DO_NOTHING:
  221
+                    continue
  222
+                sub_objs = self.related_objects(related, new_objs)
  223
+                if self.can_fast_delete(sub_objs, from_field=field):
  224
+                    self.fast_deletes.append(sub_objs)
  225
+                elif sub_objs:
179 226
                     field.rel.on_delete(self, field, sub_objs, self.using)
180 227
 
181 228
             # TODO This entire block is only needed as a special case to
@@ -241,6 +288,10 @@ def delete(self):
241 288
                     sender=model, instance=obj, using=self.using
242 289
                 )
243 290
 
  291
+        # fast deletes
  292
+        for qs in self.fast_deletes:
  293
+            qs._raw_delete(using=self.using)
  294
+
244 295
         # update fields
245 296
         for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
246 297
             query = sql.UpdateQuery(model)
8  django/db/models/query.py
@@ -529,6 +529,14 @@ def delete(self):
529 529
         self._result_cache = None
530 530
     delete.alters_data = True
531 531
 
  532
+    def _raw_delete(self, using):
  533
+        """
  534
+        Deletes objects found from the given queryset in single direct SQL
  535
+        query. No signals are sent, and there is no protection for cascades.
  536
+        """
  537
+        sql.DeleteQuery(self.model).delete_qs(self, using)
  538
+    _raw_delete.alters_data = True
  539
+
532 540
     def update(self, **kwargs):
533 541
         """
534 542
         Updates all elements in the current QuerySet, setting all the given
3  django/db/models/sql/compiler.py
@@ -934,7 +934,8 @@ def as_sql(self):
934 934
         qn = self.quote_name_unless_alias
935 935
         result = ['DELETE FROM %s' % qn(self.query.tables[0])]
936 936
         where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
937  
-        result.append('WHERE %s' % where)
  937
+        if where:
  938
+            result.append('WHERE %s' % where)
938 939
         return ' '.join(result), tuple(params)
939 940
 
940 941
 class SQLUpdateCompiler(SQLCompiler):
32  django/db/models/sql/subqueries.py
@@ -3,6 +3,7 @@
3 3
 """
4 4
 
5 5
 from django.core.exceptions import FieldError
  6
+from django.db import connections
6 7
 from django.db.models.constants import LOOKUP_SEP
7 8
 from django.db.models.fields import DateField, FieldDoesNotExist
8 9
 from django.db.models.sql.constants import *
@@ -46,6 +47,37 @@ def delete_batch(self, pk_list, using, field=None):
46 47
                     pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
47 48
             self.do_query(self.model._meta.db_table, where, using=using)
48 49
 
  50
+    def delete_qs(self, query, using):
  51
+        innerq = query.query
  52
+        # Make sure the inner query has at least one table in use.
  53
+        innerq.get_initial_alias()
  54
+        # The same for our new query.
  55
+        self.get_initial_alias()
  56
+        innerq_used_tables = [t for t in innerq.tables
  57
+                              if innerq.alias_refcount[t]]
  58
+        if ((not innerq_used_tables or innerq_used_tables == self.tables)
  59
+            and not len(innerq.having)):
  60
+            # There is only the base table in use in the query, and there are
  61
+            # no aggregate filtering going on.
  62
+            self.where = innerq.where
  63
+        else:
  64
+            pk = query.model._meta.pk
  65
+            if not connections[using].features.update_can_self_select:
  66
+                # We can't do the delete using subquery.
  67
+                values = list(query.values_list('pk', flat=True))
  68
+                if not values:
  69
+                    return
  70
+                self.delete_batch(values, using)
  71
+                return
  72
+            else:
  73
+                values = innerq
  74
+                innerq.select = [(self.get_initial_alias(), pk.column)]
  75
+            where = self.where_class()
  76
+            where.add((Constraint(None, pk.column, pk), 'in', values), AND)
  77
+            self.where = where
  78
+        self.get_compiler(using).execute_sql(None)
  79
+
  80
+
49 81
 class UpdateQuery(Query):
50 82
     """
51 83
     Represents an "update" SQL query.
15  docs/ref/models/querysets.txt
@@ -1667,6 +1667,21 @@ methods on your models. It does, however, emit the
1667 1667
 :data:`~django.db.models.signals.post_delete` signals for all deleted objects
1668 1668
 (including cascaded deletions).
1669 1669
 
  1670
+.. versionadded:: 1.5
  1671
+    Allow fast-path deletion of objects
  1672
+
  1673
+Django needs to fetch objects into memory to send signals and handle cascades.
  1674
+However, if there are no cascades and no signals, then Django may take a
  1675
+fast-path and delete objects without fetching into memory. For large
  1676
+deletes this can result in significantly reduced memory usage. The amount of
  1677
+executed queries can be reduced, too.
  1678
+
  1679
+ForeignKeys which are set to :attr:`~django.db.models.ForeignKey.on_delete`
  1680
+DO_NOTHING do not prevent taking the fast-path in deletion.
  1681
+
  1682
+Note that the queries generated in object deletion is an implementation
  1683
+detail subject to change.
  1684
+
1670 1685
 .. _field-lookups:
1671 1686
 
1672 1687
 Field lookups
6  docs/releases/1.5.txt
@@ -149,6 +149,12 @@ Django 1.5 also includes several smaller improvements worth noting:
149 149
 * Django now provides a mod_wsgi :doc:`auth handler
150 150
   </howto/deployment/wsgi/apache-auth>`
151 151
 
  152
+* The :meth:`QuerySet.delete() <django.db.models.query.QuerySet.delete>`
  153
+  and :meth:`Model.delete() <django.db.models.Model.delete()>` can now take
  154
+  fast-path in some cases. The fast-path allows for less queries and less
  155
+  objects fetched into memory. See :meth:`QuerySet.delete()
  156
+  <django.db.models.query.QuerySet.delete>` for details.
  157
+
152 158
 Backwards incompatible changes in 1.5
153 159
 =====================================
154 160
 
20  tests/modeltests/delete/models.py
@@ -95,7 +95,7 @@ class MRNull(models.Model):
95 95
 
96 96
 
97 97
 class Avatar(models.Model):
98  
-    pass
  98
+    desc = models.TextField(null=True)
99 99
 
100 100
 
101 101
 class User(models.Model):
@@ -108,3 +108,21 @@ class HiddenUser(models.Model):
108 108
 
109 109
 class HiddenUserProfile(models.Model):
110 110
     user = models.ForeignKey(HiddenUser)
  111
+
  112
+class M2MTo(models.Model):
  113
+    pass
  114
+
  115
+class M2MFrom(models.Model):
  116
+    m2m = models.ManyToManyField(M2MTo)
  117
+
  118
+class Parent(models.Model):
  119
+    pass
  120
+
  121
+class Child(Parent):
  122
+    pass
  123
+
  124
+class Base(models.Model):
  125
+    pass
  126
+
  127
+class RelToBase(models.Model):
  128
+    base = models.ForeignKey(Base, on_delete=models.DO_NOTHING)
101  tests/modeltests/delete/tests.py
... ...
@@ -1,11 +1,12 @@
1 1
 from __future__ import absolute_import
2 2
 
3  
-from django.db import models, IntegrityError
  3
+from django.db import models, IntegrityError, connection
4 4
 from django.test import TestCase, skipUnlessDBFeature, skipIfDBFeature
5 5
 from django.utils.six.moves import xrange
6 6
 
7 7
 from .models import (R, RChild, S, T, U, A, M, MR, MRNull,
8  
-    create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile)
  8
+    create_a, get_default_r, User, Avatar, HiddenUser, HiddenUserProfile,
  9
+    M2MTo, M2MFrom, Parent, Child, Base)
9 10
 
10 11
 
11 12
 class OnDeleteTests(TestCase):
@@ -74,6 +75,16 @@ def check_do_nothing(sender, **kwargs):
74 75
         self.assertEqual(replacement_r, a.donothing)
75 76
         models.signals.pre_delete.disconnect(check_do_nothing)
76 77
 
  78
+    def test_do_nothing_qscount(self):
  79
+        """
  80
+        Test that a models.DO_NOTHING relation doesn't trigger a query.
  81
+        """
  82
+        b = Base.objects.create()
  83
+        with self.assertNumQueries(1):
  84
+            # RelToBase should not be queried.
  85
+            b.delete()
  86
+        self.assertEqual(Base.objects.count(), 0)
  87
+
77 88
     def test_inheritance_cascade_up(self):
78 89
         child = RChild.objects.create()
79 90
         child.delete()
@@ -229,16 +240,34 @@ def test_can_defer_constraint_checks(self):
229 240
         # 1 query to delete the avatar
230 241
         # The important thing is that when we can defer constraint checks there
231 242
         # is no need to do an UPDATE on User.avatar to null it out.
  243
+
  244
+        # Attach a signal to make sure we will not do fast_deletes.
  245
+        calls = []
  246
+        def noop(*args, **kwargs):
  247
+            calls.append('')
  248
+        models.signals.post_delete.connect(noop, sender=User)
  249
+
232 250
         self.assertNumQueries(3, a.delete)
233 251
         self.assertFalse(User.objects.exists())
234 252
         self.assertFalse(Avatar.objects.exists())
  253
+        self.assertEquals(len(calls), 1)
  254
+        models.signals.post_delete.disconnect(noop, sender=User)
235 255
 
236 256
     @skipIfDBFeature("can_defer_constraint_checks")
237 257
     def test_cannot_defer_constraint_checks(self):
238 258
         u = User.objects.create(
239 259
             avatar=Avatar.objects.create()
240 260
         )
  261
+        # Attach a signal to make sure we will not do fast_deletes.
  262
+        calls = []
  263
+        def noop(*args, **kwargs):
  264
+            calls.append('')
  265
+        models.signals.post_delete.connect(noop, sender=User)
  266
+
241 267
         a = Avatar.objects.get(pk=u.avatar_id)
  268
+        # The below doesn't make sense... Why do we need to null out
  269
+        # user.avatar if we are going to delete the user immediately after it,
  270
+        # and there are no more cascades.
242 271
         # 1 query to find the users for the avatar.
243 272
         # 1 query to delete the user
244 273
         # 1 query to null out user.avatar, because we can't defer the constraint
@@ -246,6 +275,8 @@ def test_cannot_defer_constraint_checks(self):
246 275
         self.assertNumQueries(4, a.delete)
247 276
         self.assertFalse(User.objects.exists())
248 277
         self.assertFalse(Avatar.objects.exists())
  278
+        self.assertEquals(len(calls), 1)
  279
+        models.signals.post_delete.disconnect(noop, sender=User)
249 280
 
250 281
     def test_hidden_related(self):
251 282
         r = R.objects.create()
@@ -254,3 +285,69 @@ def test_hidden_related(self):
254 285
 
255 286
         r.delete()
256 287
         self.assertEqual(HiddenUserProfile.objects.count(), 0)
  288
+
  289
+class FastDeleteTests(TestCase):
  290
+
  291
+    def test_fast_delete_fk(self):
  292
+        u = User.objects.create(
  293
+            avatar=Avatar.objects.create()
  294
+        )
  295
+        a = Avatar.objects.get(pk=u.avatar_id)
  296
+        # 1 query to fast-delete the user
  297
+        # 1 query to delete the avatar
  298
+        self.assertNumQueries(2, a.delete)
  299
+        self.assertFalse(User.objects.exists())
  300
+        self.assertFalse(Avatar.objects.exists())
  301
+
  302
+    def test_fast_delete_m2m(self):
  303
+        t = M2MTo.objects.create()
  304
+        f = M2MFrom.objects.create()
  305
+        f.m2m.add(t)
  306
+        # 1 to delete f, 1 to fast-delete m2m for f
  307
+        self.assertNumQueries(2, f.delete)
  308
+
  309
+    def test_fast_delete_revm2m(self):
  310
+        t = M2MTo.objects.create()
  311
+        f = M2MFrom.objects.create()
  312
+        f.m2m.add(t)
  313
+        # 1 to delete t, 1 to fast-delete t's m_set
  314
+        self.assertNumQueries(2, f.delete)
  315
+
  316
+    def test_fast_delete_qs(self):
  317
+        u1 = User.objects.create()
  318
+        u2 = User.objects.create()
  319
+        self.assertNumQueries(1, User.objects.filter(pk=u1.pk).delete)
  320
+        self.assertEquals(User.objects.count(), 1)
  321
+        self.assertTrue(User.objects.filter(pk=u2.pk).exists())
  322
+
  323
+    def test_fast_delete_joined_qs(self):
  324
+        a = Avatar.objects.create(desc='a')
  325
+        User.objects.create(avatar=a)
  326
+        u2 = User.objects.create()
  327
+        expected_queries = 1 if connection.features.update_can_self_select else 2
  328
+        self.assertNumQueries(expected_queries,
  329
+                              User.objects.filter(avatar__desc='a').delete)
  330
+        self.assertEquals(User.objects.count(), 1)
  331
+        self.assertTrue(User.objects.filter(pk=u2.pk).exists())
  332
+
  333
+    def test_fast_delete_inheritance(self):
  334
+        c = Child.objects.create()
  335
+        p = Parent.objects.create()
  336
+        # 1 for self, 1 for parent
  337
+        # However, this doesn't work as child.parent access creates a query,
  338
+        # and this means we will be generating extra queries (a lot for large
  339
+        # querysets). This is not a fast-delete problem.
  340
+        # self.assertNumQueries(2, c.delete)
  341
+        c.delete()
  342
+        self.assertFalse(Child.objects.exists())
  343
+        self.assertEquals(Parent.objects.count(), 1)
  344
+        self.assertEquals(Parent.objects.filter(pk=p.pk).count(), 1)
  345
+        # 1 for self delete, 1 for fast delete of empty "child" qs.
  346
+        self.assertNumQueries(2, p.delete)
  347
+        self.assertFalse(Parent.objects.exists())
  348
+        # 1 for self delete, 1 for fast delete of empty "child" qs.
  349
+        c = Child.objects.create()
  350
+        p = c.parent_ptr
  351
+        self.assertNumQueries(2, p.delete)
  352
+        self.assertFalse(Parent.objects.exists())
  353
+        self.assertFalse(Child.objects.exists())
3  tests/regressiontests/admin_util/models.py
@@ -39,3 +39,6 @@ class Guest(models.Model):
39 39
 
40 40
     class Meta:
41 41
         verbose_name = "awesome guest"
  42
+
  43
+class EventGuide(models.Model):
  44
+    event = models.ForeignKey(Event, on_delete=models.DO_NOTHING)
13  tests/regressiontests/admin_util/tests.py
@@ -17,7 +17,7 @@
17 17
 from django.utils.safestring import mark_safe
18 18
 from django.utils import six
19 19
 
20  
-from .models import Article, Count, Event, Location
  20
+from .models import Article, Count, Event, Location, EventGuide
21 21
 
22 22
 
23 23
 class NestedObjectsTests(TestCase):
@@ -71,6 +71,17 @@ def test_queries(self):
71 71
         # Should not require additional queries to populate the nested graph.
72 72
         self.assertNumQueries(2, self._collect, 0)
73 73
 
  74
+    def test_on_delete_do_nothing(self):
  75
+        """
  76
+        Check that the nested collector doesn't query for DO_NOTHING objects.
  77
+        """
  78
+        n = NestedObjects(using=DEFAULT_DB_ALIAS)
  79
+        objs = [Event.objects.create()]
  80
+        EventGuide.objects.create(event=objs[0])
  81
+        with self.assertNumQueries(2):
  82
+            # One for Location, one for Guest, and no query for EventGuide
  83
+            n.collect(objs)
  84
+
74 85
 class UtilTests(unittest.TestCase):
75 86
     def test_values_from_lookup_field(self):
76 87
         """
11  tests/regressiontests/delete_regress/tests.py
@@ -3,7 +3,7 @@
3 3
 import datetime
4 4
 
5 5
 from django.conf import settings
6  
-from django.db import backend, transaction, DEFAULT_DB_ALIAS
  6
+from django.db import backend, transaction, DEFAULT_DB_ALIAS, models
7 7
 from django.test import TestCase, TransactionTestCase, skipUnlessDBFeature
8 8
 
9 9
 from .models import (Book, Award, AwardNote, Person, Child, Toy, PlayedWith,
@@ -139,17 +139,24 @@ def test_to_field(self):
139 139
         eaten = Eaten.objects.create(food=apple, meal="lunch")
140 140
 
141 141
         apple.delete()
  142
+        self.assertFalse(Food.objects.exists())
  143
+        self.assertFalse(Eaten.objects.exists())
  144
+
142 145
 
143 146
 class LargeDeleteTests(TestCase):
144 147
     def test_large_deletes(self):
145 148
         "Regression for #13309 -- if the number of objects > chunk size, deletion still occurs"
146 149
         for x in range(300):
147 150
             track = Book.objects.create(pagecount=x+100)
  151
+        # attach a signal to make sure we will not fast-delete
  152
+        def noop(*args, **kwargs):
  153
+            pass
  154
+        models.signals.post_delete.connect(noop, sender=Book)
148 155
         Book.objects.all().delete()
  156
+        models.signals.post_delete.disconnect(noop, sender=Book)
149 157
         self.assertEqual(Book.objects.count(), 0)
150 158
 
151 159
 
152  
-
153 160
 class ProxyDeleteTest(TestCase):
154 161
     """
155 162
     Tests on_delete behavior for proxy models.
12  tests/regressiontests/dispatch/tests/test_dispatcher.py
@@ -127,15 +127,15 @@ def testDisconnection(self):
127 127
         self._testIsClean(a_signal)
128 128
 
129 129
     def test_has_listeners(self):
130  
-        self.assertIs(a_signal.has_listeners(), False)
131  
-        self.assertIs(a_signal.has_listeners(sender=object()), False)
  130
+        self.assertFalse(a_signal.has_listeners())
  131
+        self.assertFalse(a_signal.has_listeners(sender=object()))
132 132
         receiver_1 = Callable()
133 133
         a_signal.connect(receiver_1)
134  
-        self.assertIs(a_signal.has_listeners(), True)
135  
-        self.assertIs(a_signal.has_listeners(sender=object()), True)
  134
+        self.assertTrue(a_signal.has_listeners())
  135
+        self.assertTrue(a_signal.has_listeners(sender=object()))
136 136
         a_signal.disconnect(receiver_1)
137  
-        self.assertIs(a_signal.has_listeners(), False)
138  
-        self.assertIs(a_signal.has_listeners(sender=object()), False)
  137
+        self.assertFalse(a_signal.has_listeners())
  138
+        self.assertFalse(a_signal.has_listeners(sender=object()))
139 139
 
140 140
 
141 141
 class ReceiverTestCase(unittest.TestCase):

0 notes on commit 1cd6e04

Please sign in to comment.
Something went wrong with that request. Please try again.