Skip to content

Commit

Permalink
Fixed bug with Model.delete() which did not always delete objects in …
Browse files Browse the repository at this point in the history
…the right order.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@7722 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information
spookylukey committed Jun 21, 2008
1 parent 7c62153 commit b1851cc
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 35 deletions.
19 changes: 9 additions & 10 deletions django/db/models/base.py
Expand Up @@ -10,7 +10,7 @@
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist
from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
from django.db.models.query import delete_objects, Q
from django.db.models.query import delete_objects, Q, CollectedObjects
from django.db.models.options import Options, AdminOptions
from django.db import connection, transaction
from django.db.models import signals
Expand Down Expand Up @@ -368,17 +368,16 @@ def validate(self):
error_dict[f.name] = errors
return error_dict

def _collect_sub_objects(self, seen_objs):
def _collect_sub_objects(self, seen_objs, parent=None, nullable=False):
"""
Recursively populates seen_objs with all objects related to this object.
When done, seen_objs will be in the format:
{model_class: {pk_val: obj, pk_val: obj, ...},
model_class: {pk_val: obj, pk_val: obj, ...}, ...}
When done, seen_objs.items() will be in the format:
[(model_class, {pk_val: obj, pk_val: obj, ...}),
(model_class, {pk_val: obj, pk_val: obj, ...}),...]
"""
pk_val = self._get_pk_val()
if pk_val in seen_objs.setdefault(self.__class__, {}):
if seen_objs.add(self.__class__, pk_val, self, parent, nullable):
return
seen_objs[self.__class__][pk_val] = self

for related in self._meta.get_all_related_objects():
rel_opts_name = related.get_accessor_name()
Expand All @@ -388,16 +387,16 @@ def _collect_sub_objects(self, seen_objs):
except ObjectDoesNotExist:
pass
else:
sub_obj._collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)
else:
for sub_obj in getattr(self, rel_opts_name).all():
sub_obj._collect_sub_objects(seen_objs)
sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)

def delete(self):
assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)

# Find all the objects than need to be deleted
seen_objs = SortedDict()
seen_objs = CollectedObjects()
self._collect_sub_objects(seen_objs)

# Actually delete the objects
Expand Down
118 changes: 107 additions & 11 deletions django/db/models/query.py
Expand Up @@ -16,6 +16,92 @@
# Pull into this namespace for backwards compatibility
EmptyResultSet = sql.EmptyResultSet

class CyclicDependency(Exception):
pass

class CollectedObjects(object):
"""
A container that stores keys and lists of values along with
remembering the parent objects for all the keys.
This is used for the database object deletion routines so that we
can calculate the 'leaf' objects which should be deleted first.
"""

def __init__(self):
self.data = {}
self.children = {}

def add(self, model, pk, obj, parent_model, nullable=False):
"""
Adds an item.
model is the class of the object being added,
pk is the primary key, obj is the object itself,
parent_model is the model of the parent object
that this object was reached through, nullable should
be True if this relation is nullable.
If the item already existed in the structure,
returns true, otherwise false.
"""
d = self.data.setdefault(model, SortedDict())
retval = pk in d
d[pk] = obj
# Nullable relationships can be ignored -- they
# are nulled out before deleting, and therefore
# do not affect the order in which objects have
# to be deleted.
if parent_model is not None and not nullable:
self.children.setdefault(parent_model, []).append(model)

return retval

def __contains__(self, key):
return self.data.__contains__(key)

def __getitem__(self, key):
return self.data[key]

def __nonzero__(self):
return bool(self.data)

def iteritems(self):
for k in self.ordered_keys():
yield k, self[k]

def items(self):
return list(self.iteritems())

def keys(self):
return self.ordered_keys()

def ordered_keys(self):
"""
Returns the models in the order that they should be
dealth with i.e. models with no dependencies first.
"""
dealt_with = SortedDict()
# Start with items that have no children
models = self.data.keys()
while len(dealt_with) < len(models):
found = False
for model in models:
children = self.children.setdefault(model, [])
if len([c for c in children if c not in dealt_with]) == 0:
dealt_with[model] = None
found = True
if not found:
raise CyclicDependency("There is a cyclic dependency of items to be processed.")

return dealt_with.keys()

def unordered_keys(self):
"""
Fallback for the case where is a cyclic dependency but we
don't care.
"""
return self.data.keys()

class QuerySet(object):
"Represents a lazy database lookup for a set of objects"
def __init__(self, model=None, query=None):
Expand Down Expand Up @@ -275,7 +361,7 @@ def delete(self):
while 1:
# Collect all the objects to be deleted in this chunk, and all the
# objects that are related to the objects that are to be deleted.
seen_objs = SortedDict()
seen_objs = CollectedObjects()
for object in del_query[:CHUNK_SIZE]:
object._collect_sub_objects(seen_objs)

Expand Down Expand Up @@ -682,19 +768,27 @@ def delete_objects(seen_objs):
Iterate through a list of seen classes, and remove any instances that are
referred to.
"""
ordered_classes = seen_objs.keys()
ordered_classes.reverse()

try:
ordered_classes = seen_objs.keys()
except CyclicDependency:
# if there is a cyclic dependency, we cannot in general delete
# the objects. However, if an appropriate transaction is set
# up, or if the database is lax enough, it will succeed.
# So for now, we go ahead and try anway.
ordered_classes = seen_objs.unordered_keys()

obj_pairs = {}
for cls in ordered_classes:
seen_objs[cls] = seen_objs[cls].items()
seen_objs[cls].sort()
items = seen_objs[cls].items()
items.sort()
obj_pairs[cls] = items

# Pre notify all instances to be deleted
for pk_val, instance in seen_objs[cls]:
for pk_val, instance in items:
dispatcher.send(signal=signals.pre_delete, sender=cls,
instance=instance)

pk_list = [pk for pk,instance in seen_objs[cls]]
pk_list = [pk for pk,instance in items]
del_query = sql.DeleteQuery(cls, connection)
del_query.delete_batch_related(pk_list)

Expand All @@ -705,15 +799,17 @@ def delete_objects(seen_objs):

# Now delete the actual data
for cls in ordered_classes:
seen_objs[cls].reverse()
pk_list = [pk for pk,instance in seen_objs[cls]]
items = obj_pairs[cls]
items.reverse()

pk_list = [pk for pk,instance in items]
del_query = sql.DeleteQuery(cls, connection)
del_query.delete_batch(pk_list)

# Last cleanup; set NULLs where there once was a reference to the
# object, NULL the primary key of the found objects, and perform
# post-notification.
for pk_val, instance in seen_objs[cls]:
for pk_val, instance in items:
for field in cls._meta.fields:
if field.rel and field.null and field.rel.to in seen_objs:
setattr(instance, field.attname, None)
Expand Down
105 changes: 91 additions & 14 deletions tests/modeltests/delete/models.py
Expand Up @@ -33,20 +33,57 @@ class D(DefaultRepr, models.Model):
# However, if we start at As, we might find Bs first (in which
# case things will be nice), or find Ds first.

# Some mutually dependent models, but nullable
class E(DefaultRepr, models.Model):
f = models.ForeignKey('F', null=True, related_name='e_rel')

class F(DefaultRepr, models.Model):
e = models.ForeignKey(E, related_name='f_rel')


__test__ = {'API_TESTS': """
# First, some tests for the datastructure we use
>>> from django.db.models.query import CollectedObjects
>>> g = CollectedObjects()
>>> g.add("key1", 1, "item1", None)
False
>>> g["key1"]
{1: 'item1'}
>>> g.add("key2", 1, "item1", "key1")
False
>>> g.add("key2", 2, "item2", "key1")
False
>>> g["key2"]
{1: 'item1', 2: 'item2'}
>>> g.add("key3", 1, "item1", "key1")
False
>>> g.add("key3", 1, "item1", "key2")
True
>>> g.ordered_keys()
['key3', 'key2', 'key1']
>>> g.add("key2", 1, "item1", "key3")
True
>>> g.ordered_keys()
Traceback (most recent call last):
...
CyclicDependency: There is a cyclic dependency of items to be processed.
# Due to the way that transactions work in the test harness,
# doing m.delete() here can work but fail in a real situation,
# since it may delete all objects, but not in the right order.
# So we manually check that the order of deletion is correct.
# Also, it is possible that the order is correct 'accidentally', due
# solely to order of imports etc. To check this, we set the order
# that 'get_models()' will retrieve to a known 'tricky' order, and
# then try again with the reverse and try again. Slightly naughty
# access to internals here.
# that 'get_models()' will retrieve to a known 'nice' order, and
# then try again with a known 'tricky' order. Slightly naughty
# access to internals here :-)
>>> from django.utils.datastructures import SortedDict
>>> from django.db.models.loading import cache
# Nice order
Expand All @@ -56,8 +93,6 @@ class D(DefaultRepr, models.Model):
>>> del C._meta._related_objects_cache
>>> del D._meta._related_objects_cache
>>> a1 = A()
>>> a1.save()
>>> b1 = B(a=a1)
Expand All @@ -67,9 +102,9 @@ class D(DefaultRepr, models.Model):
>>> d1 = D(c=c1, a=a1)
>>> d1.save()
>>> sd = SortedDict()
>>> a1._collect_sub_objects(sd)
>>> list(reversed(sd.keys()))
>>> o = CollectedObjects()
>>> a1._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
>>> a1.delete()
Expand All @@ -80,7 +115,6 @@ class D(DefaultRepr, models.Model):
>>> del C._meta._related_objects_cache
>>> del D._meta._related_objects_cache
>>> a2 = A()
>>> a2.save()
>>> b2 = B(a=a2)
Expand All @@ -90,13 +124,56 @@ class D(DefaultRepr, models.Model):
>>> d2 = D(c=c2, a=a2)
>>> d2.save()
>>> sd2 = SortedDict()
>>> a2._collect_sub_objects(sd2)
>>> list(reversed(sd2.keys()))
>>> o = CollectedObjects()
>>> a2._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
>>> a2.delete()
# Tests for nullable related fields
>>> g = CollectedObjects()
>>> g.add("key1", 1, "item1", None)
False
>>> g.add("key2", 1, "item1", "key1", nullable=True)
False
>>> g.add("key1", 1, "item1", "key2")
True
>>> g.ordered_keys()
['key1', 'key2']
>>> e1 = E()
>>> e1.save()
>>> f1 = F(e=e1)
>>> f1.save()
>>> e1.f = f1
>>> e1.save()
# Since E.f is nullable, we should delete F first (after nulling out
# the E.f field), then E.
>>> o = CollectedObjects()
>>> e1._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
>>> e1.delete()
>>> e2 = E()
>>> e2.save()
>>> f2 = F(e=e2)
>>> f2.save()
>>> e2.f = f2
>>> e2.save()
# Same deal as before, though we are starting from the other object.
>>> o = CollectedObjects()
>>> f2._collect_sub_objects(o)
>>> o.keys()
[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
>>> f2.delete()
"""
}

0 comments on commit b1851cc

Please sign in to comment.