Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #5420 -- Added support for delayed loading of model fields.

In extreme cases, some fields are expensive to load from the database
(e.g. GIS fields requiring conversion, or large text fields). This
commit adds defer() and only() methods to querysets that allow the
caller to specify which fields should not be loaded unless they are
accessed.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@10090 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 29050ef999e1931efb6c62471c7e07d2ea5e96ea 1 parent 96d5d43
Malcolm Tredinnick authored March 19, 2009
45  django/db/models/base.py
@@ -12,7 +12,8 @@
12 12
 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
13 13
 from django.db.models.fields import AutoField, FieldDoesNotExist
14 14
 from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
15  
-from django.db.models.query import delete_objects, Q, CollectedObjects
  15
+from django.db.models.query import delete_objects, Q
  16
+from django.db.models.query_utils import CollectedObjects, DeferredAttribute
16 17
 from django.db.models.options import Options
17 18
 from django.db import connection, transaction, DatabaseError
18 19
 from django.db.models import signals
@@ -235,6 +236,7 @@ def _prepare(cls):
235 236
 
236 237
 class Model(object):
237 238
     __metaclass__ = ModelBase
  239
+    _deferred = False
238 240
 
239 241
     def __init__(self, *args, **kwargs):
240 242
         signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
@@ -271,6 +273,13 @@ def __init__(self, *args, **kwargs):
271 273
         for field in fields_iter:
272 274
             is_related_object = False
273 275
             if kwargs:
  276
+                # This slightly odd construct is so that we can access any
  277
+                # data-descriptor object (DeferredAttribute) without triggering
  278
+                # its __get__ method.
  279
+                if (field.attname not in kwargs and
  280
+                        isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)):
  281
+                    # This field will be populated on request.
  282
+                    continue
274 283
                 if isinstance(field.rel, ManyToOneRel):
275 284
                     try:
276 285
                         # Assume object instance was passed in.
@@ -332,6 +341,31 @@ def __ne__(self, other):
332 341
     def __hash__(self):
333 342
         return hash(self._get_pk_val())
334 343
 
  344
+    def __reduce__(self):
  345
+        """
  346
+        Provide pickling support. Normally, this just dispatches to Python's
  347
+        standard handling. However, for models with deferred field loading, we
  348
+        need to do things manually, as they're dynamically created classes and
  349
+        only module-level classes can be pickled by the default path.
  350
+        """
  351
+        if not self._deferred:
  352
+            return super(Model, self).__reduce__()
  353
+        data = self.__dict__
  354
+        defers = []
  355
+        pk_val = None
  356
+        for field in self._meta.fields:
  357
+            if isinstance(self.__class__.__dict__.get(field.attname),
  358
+                    DeferredAttribute):
  359
+                defers.append(field.attname)
  360
+                if pk_val is None:
  361
+                    # The pk_val and model values are the same for all
  362
+                    # DeferredAttribute classes, so we only need to do this
  363
+                    # once.
  364
+                    obj = self.__class__.__dict__[field.attname]
  365
+                    pk_val = obj.pk_value
  366
+                    model = obj.model_ref()
  367
+        return (model_unpickle, (model, pk_val, defers), data)
  368
+
335 369
     def _get_pk_val(self, meta=None):
336 370
         if not meta:
337 371
             meta = self._meta
@@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, *args, **kwargs):
591 625
 class Empty(object):
592 626
     pass
593 627
 
  628
+def model_unpickle(model, pk_val, attrs):
  629
+    """
  630
+    Used to unpickle Model subclasses with deferred fields.
  631
+    """
  632
+    from django.db.models.query_utils import deferred_class_factory
  633
+    cls = deferred_class_factory(model, pk_val, attrs)
  634
+    return cls.__new__(cls)
  635
+model_unpickle.__safe_for_unpickle__ = True
  636
+
594 637
 if sys.version_info < (2, 5):
595 638
     # Prior to Python 2.5, Exception was an old-style class
596 639
     def subclass_exception(name, parent, unused):
6  django/db/models/manager.py
@@ -167,6 +167,12 @@ def update(self, *args, **kwargs):
167 167
     def reverse(self, *args, **kwargs):
168 168
         return self.get_query_set().reverse(*args, **kwargs)
169 169
 
  170
+    def defer(self, *args, **kwargs):
  171
+        return self.get_query_set().defer(*args, **kwargs)
  172
+
  173
+    def only(self, *args, **kwargs):
  174
+        return self.get_query_set().only(*args, **kwargs)
  175
+
170 176
     def _insert(self, values, **kwargs):
171 177
         return insert_query(self.model, values, **kwargs)
172 178
 
6  django/db/models/options.py
@@ -477,3 +477,9 @@ def get_ordered_objects(self):
477 477
             self._ordered_objects = objects
478 478
         return self._ordered_objects
479 479
 
  480
+    def pk_index(self):
  481
+        """
  482
+        Returns the index of the primary key field in the self.fields list.
  483
+        """
  484
+        return self.fields.index(self.pk)
  485
+
193  django/db/models/query.py
... ...
@@ -1,3 +1,7 @@
  1
+"""
  2
+The main QuerySet implementation. This provides the public API for the ORM.
  3
+"""
  4
+
1 5
 try:
2 6
     set
3 7
 except NameError:
@@ -6,9 +10,8 @@
6 10
 from django.db import connection, transaction, IntegrityError
7 11
 from django.db.models.aggregates import Aggregate
8 12
 from django.db.models.fields import DateField
9  
-from django.db.models.query_utils import Q, select_related_descend
  13
+from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
10 14
 from django.db.models import signals, sql
11  
-from django.utils.datastructures import SortedDict
12 15
 
13 16
 
14 17
 # Used to control how many objects are worked with at once in some cases (e.g.
@@ -22,102 +25,6 @@
22 25
 # Pull into this namespace for backwards compatibility.
23 26
 EmptyResultSet = sql.EmptyResultSet
24 27
 
25  
-
26  
-class CyclicDependency(Exception):
27  
-    """
28  
-    An error when dealing with a collection of objects that have a cyclic
29  
-    dependency, i.e. when deleting multiple objects.
30  
-    """
31  
-    pass
32  
-
33  
-
34  
-class CollectedObjects(object):
35  
-    """
36  
-    A container that stores keys and lists of values along with remembering the
37  
-    parent objects for all the keys.
38  
-
39  
-    This is used for the database object deletion routines so that we can
40  
-    calculate the 'leaf' objects which should be deleted first.
41  
-    """
42  
-
43  
-    def __init__(self):
44  
-        self.data = {}
45  
-        self.children = {}
46  
-
47  
-    def add(self, model, pk, obj, parent_model, nullable=False):
48  
-        """
49  
-        Adds an item to the container.
50  
-
51  
-        Arguments:
52  
-        * model - the class of the object being added.
53  
-        * pk - the primary key.
54  
-        * obj - the object itself.
55  
-        * parent_model - the model of the parent object that this object was
56  
-          reached through.
57  
-        * nullable - should be True if this relation is nullable.
58  
-
59  
-        Returns True if the item already existed in the structure and
60  
-        False otherwise.
61  
-        """
62  
-        d = self.data.setdefault(model, SortedDict())
63  
-        retval = pk in d
64  
-        d[pk] = obj
65  
-        # Nullable relationships can be ignored -- they are nulled out before
66  
-        # deleting, and therefore do not affect the order in which objects
67  
-        # have to be deleted.
68  
-        if parent_model is not None and not nullable:
69  
-            self.children.setdefault(parent_model, []).append(model)
70  
-        return retval
71  
-
72  
-    def __contains__(self, key):
73  
-        return self.data.__contains__(key)
74  
-
75  
-    def __getitem__(self, key):
76  
-        return self.data[key]
77  
-
78  
-    def __nonzero__(self):
79  
-        return bool(self.data)
80  
-
81  
-    def iteritems(self):
82  
-        for k in self.ordered_keys():
83  
-            yield k, self[k]
84  
-
85  
-    def items(self):
86  
-        return list(self.iteritems())
87  
-
88  
-    def keys(self):
89  
-        return self.ordered_keys()
90  
-
91  
-    def ordered_keys(self):
92  
-        """
93  
-        Returns the models in the order that they should be dealt with (i.e.
94  
-        models with no dependencies first).
95  
-        """
96  
-        dealt_with = SortedDict()
97  
-        # Start with items that have no children
98  
-        models = self.data.keys()
99  
-        while len(dealt_with) < len(models):
100  
-            found = False
101  
-            for model in models:
102  
-                if model in dealt_with:
103  
-                    continue
104  
-                children = self.children.setdefault(model, [])
105  
-                if len([c for c in children if c not in dealt_with]) == 0:
106  
-                    dealt_with[model] = None
107  
-                    found = True
108  
-            if not found:
109  
-                raise CyclicDependency(
110  
-                    "There is a cyclic dependency of items to be processed.")
111  
-
112  
-        return dealt_with.keys()
113  
-
114  
-    def unordered_keys(self):
115  
-        """
116  
-        Fallback for the case where is a cyclic dependency but we don't  care.
117  
-        """
118  
-        return self.data.keys()
119  
-
120  
-
121 28
 class QuerySet(object):
122 29
     """
123 30
     Represents a lazy database lookup for a set of objects.
@@ -275,6 +182,11 @@ def iterator(self):
275 182
         extra_select = self.query.extra_select.keys()
276 183
         aggregate_select = self.query.aggregate_select.keys()
277 184
 
  185
+        only_load = self.query.get_loaded_field_names()
  186
+        if not fill_cache:
  187
+            fields = self.model._meta.fields
  188
+            pk_idx = self.model._meta.pk_index()
  189
+
278 190
         index_start = len(extra_select)
279 191
         aggregate_start = index_start + len(self.model._meta.fields)
280 192
 
@@ -282,10 +194,31 @@ def iterator(self):
282 194
             if fill_cache:
283 195
                 obj, _ = get_cached_row(self.model, row,
284 196
                             index_start, max_depth,
285  
-                            requested=requested, offset=len(aggregate_select))
  197
+                            requested=requested, offset=len(aggregate_select),
  198
+                            only_load=only_load)
286 199
             else:
287  
-                # omit aggregates in object creation
288  
-                obj = self.model(*row[index_start:aggregate_start])
  200
+                load_fields = only_load.get(self.model)
  201
+                if load_fields:
  202
+                    # Some fields have been deferred, so we have to initialise
  203
+                    # via keyword arguments.
  204
+                    row_data = row[index_start:aggregate_start]
  205
+                    pk_val = row_data[pk_idx]
  206
+                    skip = set()
  207
+                    init_list = []
  208
+                    for field in fields:
  209
+                        if field.name not in load_fields:
  210
+                            skip.add(field.attname)
  211
+                        else:
  212
+                            init_list.append(field.attname)
  213
+                    if skip:
  214
+                        model_cls = deferred_class_factory(self.model, pk_val,
  215
+                                skip)
  216
+                        obj = model_cls(**dict(zip(init_list, row_data)))
  217
+                    else:
  218
+                        obj = self.model(*row[index_start:aggregate_start])
  219
+                else:
  220
+                    # Omit aggregates in object creation.
  221
+                    obj = self.model(*row[index_start:aggregate_start])
289 222
 
290 223
             for i, k in enumerate(extra_select):
291 224
                 setattr(obj, k, row[i])
@@ -655,6 +588,35 @@ def reverse(self):
655 588
         clone.query.standard_ordering = not clone.query.standard_ordering
656 589
         return clone
657 590
 
  591
+    def defer(self, *fields):
  592
+        """
  593
+        Defers the loading of data for certain fields until they are accessed.
  594
+        The set of fields to defer is added to any existing set of deferred
  595
+        fields. The only exception to this is if None is passed in as the only
  596
+        parameter, in which case all deferrals are removed (None acts as a
  597
+        reset option).
  598
+        """
  599
+        clone = self._clone()
  600
+        if fields == (None,):
  601
+            clone.query.clear_deferred_loading()
  602
+        else:
  603
+            clone.query.add_deferred_loading(fields)
  604
+        return clone
  605
+
  606
+    def only(self, *fields):
  607
+        """
  608
+        Essentially, the opposite of defer. Only the fields passed into this
  609
+        method and that are not already specified as deferred are loaded
  610
+        immediately when the queryset is evaluated.
  611
+        """
  612
+        if fields == [None]:
  613
+            # Can only pass None to defer(), not only(), as the rest option.
  614
+            # That won't stop people trying to do this, so let's be explicit.
  615
+            raise TypeError("Cannot pass None as an argument to only().")
  616
+        clone = self._clone()
  617
+        clone.query.add_immediate_loading(fields)
  618
+        return clone
  619
+
658 620
     ###################
659 621
     # PRIVATE METHODS #
660 622
     ###################
@@ -757,6 +719,7 @@ def _setup_query(self):
757 719
         Called by the _clone() method after initializing the rest of the
758 720
         instance.
759 721
         """
  722
+        self.query.clear_deferred_loading()
760 723
         self.query.clear_select_fields()
761 724
 
762 725
         if self._fields:
@@ -847,9 +810,9 @@ def iterator(self):
847 810
             for row in self.query.results_iter():
848 811
                 yield tuple(row)
849 812
         else:
850  
-            # When extra(select=...) or an annotation is involved, the extra cols are
851  
-            # always at the start of the row, and we need to reorder the fields
852  
-            # to match the order in self._fields.
  813
+            # When extra(select=...) or an annotation is involved, the extra
  814
+            # cols are always at the start of the row, and we need to reorder
  815
+            # the fields to match the order in self._fields.
853 816
             extra_names = self.query.extra_select.keys()
854 817
             field_names = self.field_names
855 818
             aggregate_names = self.query.aggregate_select.keys()
@@ -884,6 +847,7 @@ def _setup_query(self):
884 847
         Called by the _clone() method after initializing the rest of the
885 848
         instance.
886 849
         """
  850
+        self.query.clear_deferred_loading()
887 851
         self.query = self.query.clone(klass=sql.DateQuery, setup=True)
888 852
         self.query.select = []
889 853
         field = self.model._meta.get_field(self._field_name, many_to_many=False)
@@ -935,7 +899,7 @@ def iterator(self):
935 899
 
936 900
 
937 901
 def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
938  
-                   requested=None, offset=0):
  902
+                   requested=None, offset=0, only_load=None):
939 903
     """
940 904
     Helper function that recursively returns an object with the specified
941 905
     related attributes already populated.
@@ -951,7 +915,24 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
951 915
         # If we only have a list of Nones, there was not related object.
952 916
         obj = None
953 917
     else:
954  
-        obj = klass(*fields)
  918
+        load_fields = only_load and only_load.get(klass) or None
  919
+        if load_fields:
  920
+            # Handle deferred fields.
  921
+            skip = set()
  922
+            init_list = []
  923
+            pk_val = fields[klass._meta.pk_index()]
  924
+            for field in klass._meta.fields:
  925
+                if field.name not in load_fields:
  926
+                    skip.add(field.name)
  927
+                else:
  928
+                    init_list.append(field.attname)
  929
+            if skip:
  930
+                klass = deferred_class_factory(klass, pk_val, skip)
  931
+                obj = klass(**dict(zip(init_list, fields)))
  932
+            else:
  933
+                obj = klass(*fields)
  934
+        else:
  935
+            obj = klass(*fields)
955 936
     index_end += offset
956 937
     for f in klass._meta.fields:
957 938
         if not select_related_descend(f, restricted, requested):
171  django/db/models/query_utils.py
... ...
@@ -1,13 +1,115 @@
1 1
 """
2 2
 Various data structures used in query construction.
3 3
 
4  
-Factored out from django.db.models.query so that they can also be used by other
5  
-modules without getting into circular import difficulties.
  4
+Factored out from django.db.models.query to avoid making the main module very
  5
+large and/or so that they can be used by other modules without getting into
  6
+circular import difficulties.
6 7
 """
7 8
 
  9
+import weakref
8 10
 from copy import deepcopy
9 11
 
10 12
 from django.utils import tree
  13
+from django.utils.datastructures import SortedDict
  14
+
  15
+try:
  16
+    sorted
  17
+except NameError:
  18
+    from django.utils.itercompat import sorted  # For Python 2.3.
  19
+
  20
+
  21
+class CyclicDependency(Exception):
  22
+    """
  23
+    An error when dealing with a collection of objects that have a cyclic
  24
+    dependency, i.e. when deleting multiple objects.
  25
+    """
  26
+    pass
  27
+
  28
+class CollectedObjects(object):
  29
+    """
  30
+    A container that stores keys and lists of values along with remembering the
  31
+    parent objects for all the keys.
  32
+
  33
+    This is used for the database object deletion routines so that we can
  34
+    calculate the 'leaf' objects which should be deleted first.
  35
+    """
  36
+
  37
+    def __init__(self):
  38
+        self.data = {}
  39
+        self.children = {}
  40
+
  41
+    def add(self, model, pk, obj, parent_model, nullable=False):
  42
+        """
  43
+        Adds an item to the container.
  44
+
  45
+        Arguments:
  46
+        * model - the class of the object being added.
  47
+        * pk - the primary key.
  48
+        * obj - the object itself.
  49
+        * parent_model - the model of the parent object that this object was
  50
+          reached through.
  51
+        * nullable - should be True if this relation is nullable.
  52
+
  53
+        Returns True if the item already existed in the structure and
  54
+        False otherwise.
  55
+        """
  56
+        d = self.data.setdefault(model, SortedDict())
  57
+        retval = pk in d
  58
+        d[pk] = obj
  59
+        # Nullable relationships can be ignored -- they are nulled out before
  60
+        # deleting, and therefore do not affect the order in which objects
  61
+        # have to be deleted.
  62
+        if parent_model is not None and not nullable:
  63
+            self.children.setdefault(parent_model, []).append(model)
  64
+        return retval
  65
+
  66
+    def __contains__(self, key):
  67
+        return self.data.__contains__(key)
  68
+
  69
+    def __getitem__(self, key):
  70
+        return self.data[key]
  71
+
  72
+    def __nonzero__(self):
  73
+        return bool(self.data)
  74
+
  75
+    def iteritems(self):
  76
+        for k in self.ordered_keys():
  77
+            yield k, self[k]
  78
+
  79
+    def items(self):
  80
+        return list(self.iteritems())
  81
+
  82
+    def keys(self):
  83
+        return self.ordered_keys()
  84
+
  85
+    def ordered_keys(self):
  86
+        """
  87
+        Returns the models in the order that they should be dealt with (i.e.
  88
+        models with no dependencies first).
  89
+        """
  90
+        dealt_with = SortedDict()
  91
+        # Start with items that have no children
  92
+        models = self.data.keys()
  93
+        while len(dealt_with) < len(models):
  94
+            found = False
  95
+            for model in models:
  96
+                if model in dealt_with:
  97
+                    continue
  98
+                children = self.children.setdefault(model, [])
  99
+                if len([c for c in children if c not in dealt_with]) == 0:
  100
+                    dealt_with[model] = None
  101
+                    found = True
  102
+            if not found:
  103
+                raise CyclicDependency(
  104
+                    "There is a cyclic dependency of items to be processed.")
  105
+
  106
+        return dealt_with.keys()
  107
+
  108
+    def unordered_keys(self):
  109
+        """
  110
+        Fallback for the case where is a cyclic dependency but we don't  care.
  111
+        """
  112
+        return self.data.keys()
11 113
 
12 114
 class QueryWrapper(object):
13 115
     """
@@ -51,6 +153,39 @@ def __invert__(self):
51 153
         obj.negate()
52 154
         return obj
53 155
 
  156
+class DeferredAttribute(object):
  157
+    """
  158
+    A wrapper for a deferred-loading field. When the value is read from this
  159
+    object the first time, the query is executed.
  160
+    """
  161
+    def __init__(self, field_name, pk_value, model):
  162
+        self.field_name = field_name
  163
+        self.pk_value = pk_value
  164
+        self.model_ref = weakref.ref(model)
  165
+        self.loaded = False
  166
+
  167
+    def __get__(self, instance, owner):
  168
+        """
  169
+        Retrieves and caches the value from the datastore on the first lookup.
  170
+        Returns the cached value.
  171
+        """
  172
+        assert instance is not None
  173
+        if not self.loaded:
  174
+            obj = self.model_ref()
  175
+            if obj is None:
  176
+                return
  177
+            self.value = list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, flat=True))[0]
  178
+            self.loaded = True
  179
+        return self.value
  180
+
  181
+    def __set__(self, name, value):
  182
+        """
  183
+        Deferred loading attributes can be set normally (which means there will
  184
+        never be a database lookup involved.
  185
+        """
  186
+        self.value = value
  187
+        self.loaded = True
  188
+
54 189
 def select_related_descend(field, restricted, requested):
55 190
     """
56 191
     Returns True if this field should be used to descend deeper for
@@ -67,3 +202,35 @@ def select_related_descend(field, restricted, requested):
67 202
     if not restricted and field.null:
68 203
         return False
69 204
     return True
  205
+
  206
+# This function is needed because data descriptors must be defined on a class
  207
+# object, not an instance, to have any effect.
  208
+
  209
+def deferred_class_factory(model, pk_value, attrs):
  210
+    """
  211
+    Returns a class object that is a copy of "model" with the specified "attrs"
  212
+    being replaced with DeferredAttribute objects. The "pk_value" ties the
  213
+    deferred attributes to a particular instance of the model.
  214
+    """
  215
+    class Meta:
  216
+        pass
  217
+    setattr(Meta, "proxy", True)
  218
+    setattr(Meta, "app_label", model._meta.app_label)
  219
+
  220
+    # The app_cache wants a unique name for each model, otherwise the new class
  221
+    # won't be created (we get an old one back). Therefore, we generate the
  222
+    # name using the passed in attrs. It's OK to reuse an old case if the attrs
  223
+    # are identical.
  224
+    name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs))))
  225
+
  226
+    overrides = dict([(attr, DeferredAttribute(attr, pk_value, model))
  227
+            for attr in attrs])
  228
+    overrides["Meta"] = Meta
  229
+    overrides["__module__"] = model.__module__
  230
+    overrides["_deferred"] = True
  231
+    return type(name, (model,), overrides)
  232
+
  233
+# The above function is also used to unpickle model instances with deferred
  234
+# fields.
  235
+deferred_class_factory.__safe_for_unpickling__ = True
  236
+
185  django/db/models/sql/query.py
@@ -94,6 +94,11 @@ def __init__(self, model, connection, where=WhereNode):
94 94
         self.extra_params = ()
95 95
         self.extra_order_by = ()
96 96
 
  97
+        # A tuple that is a set of model field names and either True, if these
  98
+        # are the fields to defer, or False if these are the only fields to
  99
+        # load.
  100
+        self.deferred_loading = (set(), True)
  101
+
97 102
     def __str__(self):
98 103
         """
99 104
         Returns the query as a string of SQL with the parameter values
@@ -206,6 +211,7 @@ def clone(self, klass=None, **kwargs):
206 211
         obj.extra_where = self.extra_where
207 212
         obj.extra_params = self.extra_params
208 213
         obj.extra_order_by = self.extra_order_by
  214
+        obj.deferred_loading = deepcopy(self.deferred_loading)
209 215
         if self.filter_is_sticky and self.used_aliases:
210 216
             obj.used_aliases = self.used_aliases.copy()
211 217
         else:
@@ -550,9 +556,101 @@ def pre_sql_setup(self):
550 556
         if self.select_related and not self.related_select_cols:
551 557
             self.fill_related_selections()
552 558
 
  559
+    def deferred_to_data(self, target, callback):
  560
+        """
  561
+        Converts the self.deferred_loading data structure to an alternate data
  562
+        structure, describing the field that *will* be loaded. This is used to
  563
+        compute the columns to select from the database and also by the
  564
+        QuerySet class to work out which fields are being initialised on each
  565
+        model. Models that have all their fields included aren't mentioned in
  566
+        the result, only those that have field restrictions in place.
  567
+
  568
+        The "target" parameter is the instance that is populated (in place).
  569
+        The "callback" is a function that is called whenever a (model, field)
  570
+        pair need to be added to "target". It accepts three parameters:
  571
+        "target", and the model and list of fields being added for that model.
  572
+        """
  573
+        field_names, defer = self.deferred_loading
  574
+        if not field_names:
  575
+            return
  576
+        columns = set()
  577
+        cur_model = self.model
  578
+        opts = cur_model._meta
  579
+        seen = {}
  580
+        must_include = {cur_model: set([opts.pk])}
  581
+        for field_name in field_names:
  582
+            parts = field_name.split(LOOKUP_SEP)
  583
+            for name in parts[:-1]:
  584
+                old_model = cur_model
  585
+                source = opts.get_field_by_name(name)[0]
  586
+                cur_model = opts.get_field_by_name(name)[0].rel.to
  587
+                opts = cur_model._meta
  588
+                # Even if we're "just passing through" this model, we must add
  589
+                # both the current model's pk and the related reference field
  590
+                # to the things we select.
  591
+                must_include[old_model].add(source)
  592
+                add_to_dict(must_include, cur_model, opts.pk)
  593
+            field, model, _, _ = opts.get_field_by_name(parts[-1])
  594
+            if model is None:
  595
+                model = cur_model
  596
+            add_to_dict(seen, model, field)
  597
+
  598
+        if defer:
  599
+            # We need to load all fields for each model, except those that
  600
+            # appear in "seen" (for all models that appear in "seen"). The only
  601
+            # slight complexity here is handling fields that exist on parent
  602
+            # models.
  603
+            workset = {}
  604
+            for model, values in seen.iteritems():
  605
+                for field, f_model in model._meta.get_fields_with_model():
  606
+                    if field in values:
  607
+                        continue
  608
+                    add_to_dict(workset, f_model or model, field)
  609
+            for model, values in must_include.iteritems():
  610
+                # If we haven't included a model in workset, we don't add the
  611
+                # corresponding must_include fields for that model, since an
  612
+                # empty set means "include all fields". That's why there's no
  613
+                # "else" branch here.
  614
+                if model in workset:
  615
+                    workset[model].update(values)
  616
+            for model, values in workset.iteritems():
  617
+                callback(target, model, values)
  618
+        else:
  619
+            for model, values in must_include.iteritems():
  620
+                if model in seen:
  621
+                    seen[model].update(values)
  622
+                else:
  623
+                    # As we've passed through this model, but not explicitly
  624
+                    # included any fields, we have to make sure it's mentioned
  625
+                    # so that only the "must include" fields are pulled in.
  626
+                    seen[model] = values
  627
+            for model, values in seen.iteritems():
  628
+                callback(target, model, values)
  629
+
  630
+    def deferred_to_columns(self):
  631
+        """
  632
+        Converts the self.deferred_loading data structure to mapping of table
  633
+        names to sets of column names which are to be loaded. Returns the
  634
+        dictionary.
  635
+        """
  636
+        columns = {}
  637
+        self.deferred_to_data(columns, self.deferred_to_columns_cb)
  638
+        return columns
  639
+
  640
+    def deferred_to_columns_cb(self, target, model, fields):
  641
+        """
  642
+        Callback used by deferred_to_columns(). The "target" parameter should
  643
+        be a set instance.
  644
+        """
  645
+        table = model._meta.db_table
  646
+        if table not in target:
  647
+            target[table] = set()
  648
+        for field in fields:
  649
+            target[table].add(field.column)
  650
+
553 651
     def get_columns(self, with_aliases=False):
554 652
         """
555  
-        Return the list of columns to use in the select statement. If no
  653
+        Returns the list of columns to use in the select statement. If no
556 654
         columns have been specified, returns all columns relating to fields in
557 655
         the model.
558 656
 
@@ -569,9 +667,14 @@ def get_columns(self, with_aliases=False):
569 667
         else:
570 668
             col_aliases = set()
571 669
         if self.select:
  670
+            only_load = self.deferred_to_columns()
572 671
             for col in self.select:
573 672
                 if isinstance(col, (list, tuple)):
574  
-                    r = '%s.%s' % (qn(col[0]), qn(col[1]))
  673
+                    alias, column = col
  674
+                    table = self.alias_map[alias][TABLE_NAME]
  675
+                    if table in only_load and col not in only_load[table]:
  676
+                        continue
  677
+                    r = '%s.%s' % (qn(alias), qn(column))
575 678
                     if with_aliases:
576 679
                         if col[1] in col_aliases:
577 680
                             c_alias = 'Col%d' % len(col_aliases)
@@ -641,6 +744,7 @@ def get_default_columns(self, with_aliases=False, col_aliases=None,
641 744
         qn = self.quote_name_unless_alias
642 745
         qn2 = self.connection.ops.quote_name
643 746
         aliases = set()
  747
+        only_load = self.deferred_to_columns()
644 748
         proxied_model = opts.proxy and opts.proxy_for_model or 0
645 749
         if start_alias:
646 750
             seen = {None: start_alias}
@@ -661,6 +765,9 @@ def get_default_columns(self, with_aliases=False, col_aliases=None,
661 765
                 # aliases will have already been set up in pre_sql_setup(), so
662 766
                 # we can save time here.
663 767
                 alias = self.included_inherited_models[model]
  768
+            table = self.alias_map[alias][TABLE_NAME]
  769
+            if table in only_load and field.column not in only_load[table]:
  770
+                continue
664 771
             if as_pairs:
665 772
                 result.append((alias, field.column))
666 773
                 continue
@@ -2014,6 +2121,70 @@ def add_extra(self, select, select_params, where, params, tables, order_by):
2014 2121
         if order_by:
2015 2122
             self.extra_order_by = order_by
2016 2123
 
  2124
+    def clear_deferred_loading(self):
  2125
+        """
  2126
+        Remove any fields from the deferred loading set.
  2127
+        """
  2128
+        self.deferred_loading = (set(), True)
  2129
+
  2130
+    def add_deferred_loading(self, field_names):
  2131
+        """
  2132
+        Add the given list of model field names to the set of fields to
  2133
+        exclude from loading from the database when automatic column selection
  2134
+        is done. The new field names are added to any existing field names that
  2135
+        are deferred (or removed from any existing field names that are marked
  2136
+        as the only ones for immediate loading).
  2137
+        """
  2138
+        # Fields on related models are stored in the literal double-underscore
  2139
+        # format, so that we can use a set datastructure. We do the foo__bar
  2140
+        # splitting and handling when computing the SQL colum names (as part of
  2141
+        # get_columns()).
  2142
+        existing, defer = self.deferred_loading
  2143
+        if defer:
  2144
+            # Add to existing deferred names.
  2145
+            self.deferred_loading = existing.union(field_names), True
  2146
+        else:
  2147
+            # Remove names from the set of any existing "immediate load" names.
  2148
+            self.deferred_loading = existing.difference(field_names), False
  2149
+
  2150
+    def add_immediate_loading(self, field_names):
  2151
+        """
  2152
+        Add the given list of model field names to the set of fields to
  2153
+        retrieve when the SQL is executed ("immediate loading" fields). The
  2154
+        field names replace any existing immediate loading field names. If
  2155
+        there are field names already specified for deferred loading, those
  2156
+        names are removed from the new field_names before storing the new names
  2157
+        for immediate loading. (That is, immediate loading overrides any
  2158
+        existing immediate values, but respects existing deferrals.)
  2159
+        """
  2160
+        existing, defer = self.deferred_loading
  2161
+        if defer:
  2162
+            # Remove any existing deferred names from the current set before
  2163
+            # setting the new names.
  2164
+            self.deferred_loading = set(field_names).difference(existing), False
  2165
+        else:
  2166
+            # Replace any existing "immediate load" field names.
  2167
+            self.deferred_loading = set(field_names), False
  2168
+
  2169
+    def get_loaded_field_names(self):
  2170
+        """
  2171
+        If any fields are marked to be deferred, returns a dictionary mapping
  2172
+        models to a set of names in those fields that will be loaded. If a
  2173
+        model is not in the returned dictionary, none of it's fields are
  2174
+        deferred.
  2175
+
  2176
+        If no fields are marked for deferral, returns an empty dictionary.
  2177
+        """
  2178
+        collection = {}
  2179
+        self.deferred_to_data(collection, self.get_loaded_field_names_cb)
  2180
+        return collection
  2181
+
  2182
+    def get_loaded_field_names_cb(self, target, model, fields):
  2183
+        """
  2184
+        Callback used by get_deferred_field_names().
  2185
+        """
  2186
+        target[model] = set([f.name for f in fields])
  2187
+
2017 2188
     def trim_extra_select(self, names):
2018 2189
         """
2019 2190
         Removes any aliases in the extra_select dictionary that aren't in
@@ -2180,3 +2351,13 @@ def setup_join_cache(sender, **kwargs):
2180 2351
 
2181 2352
 signals.class_prepared.connect(setup_join_cache)
2182 2353
 
  2354
+def add_to_dict(data, key, value):
  2355
+    """
  2356
+    A helper function to add "value" to the set of values for "key", whether or
  2357
+    not "key" already exists.
  2358
+    """
  2359
+    if key in data:
  2360
+        data[key].add(value)
  2361
+    else:
  2362
+        data[key] = set([value])
  2363
+
95  docs/ref/models/querysets.txt
@@ -768,6 +768,101 @@ of the arguments is required, but you should use at least one of them.
768 768
 
769 769
         Entry.objects.extra(where=['headline=%s'], params=['Lennon'])
770 770
 
  771
+``defer(*fields)``
  772
+~~~~~~~~~~~~~~~~~~
  773
+
  774
+.. versionadded:: 1.1
  775
+
  776
+In some complex data-modeling situations, your models might contain a lot of
  777
+fields, some of which could contain a lot of data (for example, text fields),
  778
+or require expensive processing to convert them to Python objects. If you are
  779
+using the results of a queryset in some situation where you know you don't
  780
+need those particular fields, you can tell Django not to retrieve them from
  781
+the database.
  782
+
  783
+This is done by passing the names of the fields to not load to ``defer()``::
  784
+
  785
+    Entry.objects.defer("lede", "body")
  786
+
  787
+A queryset that has deferred fields will still return model instances. Each
  788
+deferred field will be retrieved from the database if you access that field
  789
+(one at a time, not all the deferred fields at once).
  790
+
  791
+You can make multiple calls to ``defer()``. Each call adds new fields to the
  792
+deferred set::
  793
+
  794
+    # Defers both the body and lede fields.
  795
+    Entry.objects.defer("body").filter(headline="Lennon").defer("lede")
  796
+
  797
+The order in which fields are added to the deferred set does not matter. Calling ``defer()`` with a field name that has already been deferred is harmless (the field will still be deferred).
  798
+
  799
+You can defer loading of fields in related models (if the related models are
  800
+loading via ``select_related()``) by using the standard double-underscore
  801
+notation to separate related fields::
  802
+
  803
+    Blog.objects.select_related().defer("entry__lede", "entry__body")
  804
+
  805
+If you want to clear the set of deferred fields, pass ``None`` as a parameter
  806
+to ``defer()``::
  807
+
  808
+    # Load all fields immediately.
  809
+    my_queryset.defer(None)
  810
+
  811
+Some fields in a model won't be deferred, even if you ask for them. You can
  812
+never defer the loading of the primary key. If you are using
  813
+``select_related()`` to retrieve other models at the same time you shouldn't
  814
+defer the loading of the field that connects from the primary model to the
  815
+related one (at the moment, that doesn't raise an error, but it will
  816
+eventually).
  817
+
  818
+.. note::
  819
+
  820
+    The ``defer()`` method (and its cousin, ``only()``, below) are only for
  821
+    advanced use-cases. They provide an optimization for when you have
  822
+    analyzed your queries closely and understand *exactly* what information
  823
+    you need and have measured that the difference between returning the
  824
+    fields you need and the full set of fields for the model will be
  825
+    significant. When you are initially developing your applications, don't
  826
+    bother using ``defer()``; leave it until your query construction has
  827
+    settled down and you understand where the hot-points are.
  828
+
  829
+``only(*fields)``
  830
+~~~~~~~~~~~~~~~~~~
  831
+
  832
+.. versionadded:: 1.1
  833
+
  834
+The ``only()`` method is more or less the opposite of ``defer()``. You
  835
+call it with the fields that should *not* be deferred when retrieving a model.
  836
+If you have a model where almost all the fields need to be deferred, using
  837
+``only()`` to specify the complementary set of fields could result in simpler
  838
+code.
  839
+
  840
+If you have a model with fields ``name``, ``age`` and ``biography``, the
  841
+following two querysets are the same, in terms of deferred fields::
  842
+
  843
+    Person.objects.defer("age", "biography")
  844
+    Person.objects.only("name")
  845
+
  846
+Whenever you call ``only()`` it *replaces* the set of fields to load
  847
+immediately. The method's name is mnemonic: **only** those fields are loaded
  848
+immediately; the remainder are deferred. Thus, successive calls to ``only()``
  849
+result in only the final fields being considered::
  850
+
  851
+    # This will defer all fields except the headline.
  852
+    Entry.objects.only("body", "lede").only("headline")
  853
+
  854
+Since ``defer()`` acts incrementally (adding fields to the deferred list), you
  855
+can combine calls to ``only()`` and ``defer()`` and things will behave
  856
+logically::
  857
+
  858
+    # Final result is that everything except "headline" is deferred.
  859
+    Entry.objects.only("headline", "body").defer("body")
  860
+
  861
+    # Final result loads headline and body immediately (only() replaces any
  862
+    # existing set of fields).
  863
+    Entry.objects.defer("body").only("headline", "body")
  864
+
  865
+
771 866
 QuerySet methods that do not return QuerySets
772 867
 ---------------------------------------------
773 868
 
0  tests/modeltests/defer/__init__.py
No changes.
89  tests/modeltests/defer/models.py
... ...
@@ -0,0 +1,89 @@
  1
+"""
  2
+Tests for defer() and only().
  3
+"""
  4
+
  5
+from django.db import models
  6
+from django.db.models.query_utils import DeferredAttribute
  7
+
  8
+class Secondary(models.Model):
  9
+    first = models.CharField(max_length=50)
  10
+    second = models.CharField(max_length=50)
  11
+
  12
+class Primary(models.Model):
  13
+    name = models.CharField(max_length=50)
  14
+    value = models.CharField(max_length=50)
  15
+    related = models.ForeignKey(Secondary)
  16
+
  17
+def count_delayed_fields(obj, debug=False):
  18
+    """
  19
+    Returns the number of delayed attributes on the given model instance.
  20
+    """
  21
+    count = 0
  22
+    for field in obj._meta.fields:
  23
+        if isinstance(obj.__class__.__dict__.get(field.attname),
  24
+                DeferredAttribute):
  25
+            if debug:
  26
+                print field.name, field.attname
  27
+            count += 1
  28
+    return count
  29
+
  30
+
  31
+__test__ = {"API_TEST": """
  32
+To all outward appearances, instances with deferred fields look the same as
  33
+normal instances when we examine attribut values. Therefore we test for the
  34
+number of deferred fields on returned instances (by poking at the internals),
  35
+as a way to observe what is going on.
  36
+
  37
+>>> s1 = Secondary.objects.create(first="x1", second="y1")
  38
+>>> p1 = Primary.objects.create(name="p1", value="xx", related=s1)
  39
+
  40
+>>> qs = Primary.objects.all()
  41
+
  42
+>>> count_delayed_fields(qs.defer('name')[0])
  43
+1
  44
+>>> count_delayed_fields(qs.only('name')[0])
  45
+2
  46
+>>> count_delayed_fields(qs.defer('related__first')[0])
  47
+0
  48
+>>> obj = qs.select_related().only('related__first')[0]
  49
+>>> count_delayed_fields(obj)
  50
+2
  51
+>>> obj.related_id == s1.pk
  52
+True
  53
+>>> count_delayed_fields(qs.defer('name').extra(select={'a': 1})[0])
  54
+1
  55
+>>> count_delayed_fields(qs.extra(select={'a': 1}).defer('name')[0])
  56
+1
  57
+>>> count_delayed_fields(qs.defer('name').defer('value')[0])
  58
+2
  59
+>>> count_delayed_fields(qs.only('name').only('value')[0])
  60
+2
  61
+>>> count_delayed_fields(qs.only('name').defer('value')[0])
  62
+2
  63
+>>> count_delayed_fields(qs.only('name', 'value').defer('value')[0])
  64
+2
  65
+>>> count_delayed_fields(qs.defer('name').only('value')[0])
  66
+2
  67
+>>> obj = qs.only()[0]
  68
+>>> count_delayed_fields(qs.defer(None)[0])
  69
+0
  70
+>>> count_delayed_fields(qs.only('name').defer(None)[0])
  71
+0
  72
+
  73
+User values() won't defer anything (you get the full list of dictionaries
  74
+back), but it still works.
  75
+>>> qs.defer('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
  76
+True
  77
+>>> qs.only('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 'related_id': s1.id}
  78
+True
  79
+
  80
+Using defer() and only() with get() is also valid.
  81
+>>> count_delayed_fields(qs.defer('name').get(pk=p1.pk))
  82
+1
  83
+>>> count_delayed_fields(qs.only('name').get(pk=p1.pk))
  84
+2
  85
+
  86
+# KNOWN NOT TO WORK: >>> count_delayed_fields(qs.only('name').select_related('related')[0])
  87
+# KNOWN NOT TO WORK >>> count_delayed_fields(qs.defer('related').select_related('related')[0])
  88
+
  89
+"""}
6  tests/regressiontests/queries/models.py
@@ -890,6 +890,12 @@ def __unicode__(self):
890 890
 >>> query2.as_sql()[0] == query
891 891
 True
892 892
 
  893
+Check pickling of deferred-loading querysets
  894
+>>> qs = Item.objects.defer('name', 'creator')
  895
+>>> q2 = pickle.loads(pickle.dumps(qs))
  896
+>>> list(qs) == list(q2)
  897
+True
  898
+
893 899
 Bug #7277
894 900
 >>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | Q(tag__children__children=t5))
895 901
 [<Annotation: a1>]

0 notes on commit 29050ef

Please sign in to comment.
Something went wrong with that request. Please try again.