Permalink
Browse files

Fixed #5420 -- Added support for delayed loading of model fields.

In extreme cases, some fields are expensive to load from the database
(e.g. GIS fields requiring conversion, or large text fields). This
commit adds defer() and only() methods to querysets that allow the
caller to specify which fields should not be loaded unless they are
accessed.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@10090 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
1 parent 96d5d43 commit 29050ef999e1931efb6c62471c7e07d2ea5e96ea @malcolmt malcolmt committed Mar 19, 2009
@@ -12,7 +12,8 @@
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
from django.db.models.fields import AutoField, FieldDoesNotExist
from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
-from django.db.models.query import delete_objects, Q, CollectedObjects
+from django.db.models.query import delete_objects, Q
+from django.db.models.query_utils import CollectedObjects, DeferredAttribute
from django.db.models.options import Options
from django.db import connection, transaction, DatabaseError
from django.db.models import signals
@@ -235,6 +236,7 @@ def _prepare(cls):
class Model(object):
__metaclass__ = ModelBase
+ _deferred = False
def __init__(self, *args, **kwargs):
signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
@@ -271,6 +273,13 @@ def __init__(self, *args, **kwargs):
for field in fields_iter:
is_related_object = False
if kwargs:
+ # This slightly odd construct is so that we can access any
+ # data-descriptor object (DeferredAttribute) without triggering
+ # its __get__ method.
+ if (field.attname not in kwargs and
+ isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)):
+ # This field will be populated on request.
+ continue
if isinstance(field.rel, ManyToOneRel):
try:
# Assume object instance was passed in.
@@ -332,6 +341,31 @@ def __ne__(self, other):
def __hash__(self):
return hash(self._get_pk_val())
+ def __reduce__(self):
+ """
+ Provide pickling support. Normally, this just dispatches to Python's
+ standard handling. However, for models with deferred field loading, we
+ need to do things manually, as they're dynamically created classes and
+ only module-level classes can be pickled by the default path.
+ """
+ if not self._deferred:
+ return super(Model, self).__reduce__()
+ data = self.__dict__
+ defers = []
+ pk_val = None
+ for field in self._meta.fields:
+ if isinstance(self.__class__.__dict__.get(field.attname),
+ DeferredAttribute):
+ defers.append(field.attname)
+ if pk_val is None:
+ # The pk_val and model values are the same for all
+ # DeferredAttribute classes, so we only need to do this
+ # once.
+ obj = self.__class__.__dict__[field.attname]
+ pk_val = obj.pk_value
+ model = obj.model_ref()
+ return (model_unpickle, (model, pk_val, defers), data)
+
def _get_pk_val(self, meta=None):
if not meta:
meta = self._meta
@@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, *args, **kwargs):
class Empty(object):
pass
+def model_unpickle(model, pk_val, attrs):
+ """
+ Used to unpickle Model subclasses with deferred fields.
+ """
+ from django.db.models.query_utils import deferred_class_factory
+ cls = deferred_class_factory(model, pk_val, attrs)
+ return cls.__new__(cls)
+model_unpickle.__safe_for_unpickle__ = True
+
if sys.version_info < (2, 5):
# Prior to Python 2.5, Exception was an old-style class
def subclass_exception(name, parent, unused):
@@ -167,6 +167,12 @@ def update(self, *args, **kwargs):
def reverse(self, *args, **kwargs):
return self.get_query_set().reverse(*args, **kwargs)
+ def defer(self, *args, **kwargs):
+ return self.get_query_set().defer(*args, **kwargs)
+
+ def only(self, *args, **kwargs):
+ return self.get_query_set().only(*args, **kwargs)
+
def _insert(self, values, **kwargs):
return insert_query(self.model, values, **kwargs)
@@ -477,3 +477,9 @@ def get_ordered_objects(self):
self._ordered_objects = objects
return self._ordered_objects
+ def pk_index(self):
+ """
+ Returns the index of the primary key field in the self.fields list.
+ """
+ return self.fields.index(self.pk)
+
@@ -1,3 +1,7 @@
+"""
+The main QuerySet implementation. This provides the public API for the ORM.
+"""
+
try:
set
except NameError:
@@ -6,9 +10,8 @@
from django.db import connection, transaction, IntegrityError
from django.db.models.aggregates import Aggregate
from django.db.models.fields import DateField
-from django.db.models.query_utils import Q, select_related_descend
+from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
from django.db.models import signals, sql
-from django.utils.datastructures import SortedDict
# Used to control how many objects are worked with at once in some cases (e.g.
@@ -22,102 +25,6 @@
# Pull into this namespace for backwards compatibility.
EmptyResultSet = sql.EmptyResultSet
-
-class CyclicDependency(Exception):
- """
- An error when dealing with a collection of objects that have a cyclic
- dependency, i.e. when deleting multiple objects.
- """
- pass
-
-
-class CollectedObjects(object):
- """
- A container that stores keys and lists of values along with remembering the
- parent objects for all the keys.
-
- This is used for the database object deletion routines so that we can
- calculate the 'leaf' objects which should be deleted first.
- """
-
- def __init__(self):
- self.data = {}
- self.children = {}
-
- def add(self, model, pk, obj, parent_model, nullable=False):
- """
- Adds an item to the container.
-
- Arguments:
- * model - the class of the object being added.
- * pk - the primary key.
- * obj - the object itself.
- * parent_model - the model of the parent object that this object was
- reached through.
- * nullable - should be True if this relation is nullable.
-
- Returns True if the item already existed in the structure and
- False otherwise.
- """
- d = self.data.setdefault(model, SortedDict())
- retval = pk in d
- d[pk] = obj
- # Nullable relationships can be ignored -- they are nulled out before
- # deleting, and therefore do not affect the order in which objects
- # have to be deleted.
- if parent_model is not None and not nullable:
- self.children.setdefault(parent_model, []).append(model)
- return retval
-
- def __contains__(self, key):
- return self.data.__contains__(key)
-
- def __getitem__(self, key):
- return self.data[key]
-
- def __nonzero__(self):
- return bool(self.data)
-
- def iteritems(self):
- for k in self.ordered_keys():
- yield k, self[k]
-
- def items(self):
- return list(self.iteritems())
-
- def keys(self):
- return self.ordered_keys()
-
- def ordered_keys(self):
- """
- Returns the models in the order that they should be dealt with (i.e.
- models with no dependencies first).
- """
- dealt_with = SortedDict()
- # Start with items that have no children
- models = self.data.keys()
- while len(dealt_with) < len(models):
- found = False
- for model in models:
- if model in dealt_with:
- continue
- children = self.children.setdefault(model, [])
- if len([c for c in children if c not in dealt_with]) == 0:
- dealt_with[model] = None
- found = True
- if not found:
- raise CyclicDependency(
- "There is a cyclic dependency of items to be processed.")
-
- return dealt_with.keys()
-
- def unordered_keys(self):
- """
- Fallback for the case where is a cyclic dependency but we don't care.
- """
- return self.data.keys()
-
-
class QuerySet(object):
"""
Represents a lazy database lookup for a set of objects.
@@ -275,17 +182,43 @@ def iterator(self):
extra_select = self.query.extra_select.keys()
aggregate_select = self.query.aggregate_select.keys()
+ only_load = self.query.get_loaded_field_names()
+ if not fill_cache:
+ fields = self.model._meta.fields
+ pk_idx = self.model._meta.pk_index()
+
index_start = len(extra_select)
aggregate_start = index_start + len(self.model._meta.fields)
for row in self.query.results_iter():
if fill_cache:
obj, _ = get_cached_row(self.model, row,
index_start, max_depth,
- requested=requested, offset=len(aggregate_select))
+ requested=requested, offset=len(aggregate_select),
+ only_load=only_load)
else:
- # omit aggregates in object creation
- obj = self.model(*row[index_start:aggregate_start])
+ load_fields = only_load.get(self.model)
+ if load_fields:
+ # Some fields have been deferred, so we have to initialise
+ # via keyword arguments.
+ row_data = row[index_start:aggregate_start]
+ pk_val = row_data[pk_idx]
+ skip = set()
+ init_list = []
+ for field in fields:
+ if field.name not in load_fields:
+ skip.add(field.attname)
+ else:
+ init_list.append(field.attname)
+ if skip:
+ model_cls = deferred_class_factory(self.model, pk_val,
+ skip)
+ obj = model_cls(**dict(zip(init_list, row_data)))
+ else:
+ obj = self.model(*row[index_start:aggregate_start])
+ else:
+ # Omit aggregates in object creation.
+ obj = self.model(*row[index_start:aggregate_start])
for i, k in enumerate(extra_select):
setattr(obj, k, row[i])
@@ -655,6 +588,35 @@ def reverse(self):
clone.query.standard_ordering = not clone.query.standard_ordering
return clone
+ def defer(self, *fields):
+ """
+ Defers the loading of data for certain fields until they are accessed.
+ The set of fields to defer is added to any existing set of deferred
+ fields. The only exception to this is if None is passed in as the only
+ parameter, in which case all deferrals are removed (None acts as a
+ reset option).
+ """
+ clone = self._clone()
+ if fields == (None,):
+ clone.query.clear_deferred_loading()
+ else:
+ clone.query.add_deferred_loading(fields)
+ return clone
+
+ def only(self, *fields):
+ """
+ Essentially, the opposite of defer. Only the fields passed into this
+ method and that are not already specified as deferred are loaded
+ immediately when the queryset is evaluated.
+ """
+ if fields == [None]:
+ # Can only pass None to defer(), not only(), as the rest option.
+ # That won't stop people trying to do this, so let's be explicit.
+ raise TypeError("Cannot pass None as an argument to only().")
+ clone = self._clone()
+ clone.query.add_immediate_loading(fields)
+ return clone
+
###################
# PRIVATE METHODS #
###################
@@ -757,6 +719,7 @@ def _setup_query(self):
Called by the _clone() method after initializing the rest of the
instance.
"""
+ self.query.clear_deferred_loading()
self.query.clear_select_fields()
if self._fields:
@@ -847,9 +810,9 @@ def iterator(self):
for row in self.query.results_iter():
yield tuple(row)
else:
- # When extra(select=...) or an annotation is involved, the extra cols are
- # always at the start of the row, and we need to reorder the fields
- # to match the order in self._fields.
+ # When extra(select=...) or an annotation is involved, the extra
+ # cols are always at the start of the row, and we need to reorder
+ # the fields to match the order in self._fields.
extra_names = self.query.extra_select.keys()
field_names = self.field_names
aggregate_names = self.query.aggregate_select.keys()
@@ -884,6 +847,7 @@ def _setup_query(self):
Called by the _clone() method after initializing the rest of the
instance.
"""
+ self.query.clear_deferred_loading()
self.query = self.query.clone(klass=sql.DateQuery, setup=True)
self.query.select = []
field = self.model._meta.get_field(self._field_name, many_to_many=False)
@@ -935,7 +899,7 @@ def iterator(self):
def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
- requested=None, offset=0):
+ requested=None, offset=0, only_load=None):
"""
Helper function that recursively returns an object with the specified
related attributes already populated.
@@ -951,7 +915,24 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
# If we only have a list of Nones, there was not related object.
obj = None
else:
- obj = klass(*fields)
+ load_fields = only_load and only_load.get(klass) or None
+ if load_fields:
+ # Handle deferred fields.
+ skip = set()
+ init_list = []
+ pk_val = fields[klass._meta.pk_index()]
+ for field in klass._meta.fields:
+ if field.name not in load_fields:
+ skip.add(field.name)
+ else:
+ init_list.append(field.attname)
+ if skip:
+ klass = deferred_class_factory(klass, pk_val, skip)
+ obj = klass(**dict(zip(init_list, fields)))
+ else:
+ obj = klass(*fields)
+ else:
+ obj = klass(*fields)
index_end += offset
for f in klass._meta.fields:
if not select_related_descend(f, restricted, requested):
Oops, something went wrong.

0 comments on commit 29050ef

Please sign in to comment.