Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
  • 1 commit
  • 3 files changed
  • 0 comments
  • 1 contributor
144  django/db/models/query.py
@@ -20,11 +20,6 @@
20 20
 from django.utils import six
21 21
 from django.utils import timezone
22 22
 
23  
-# Used to control how many objects are worked with at once in some cases (e.g.
24  
-# when deleting objects).
25  
-CHUNK_SIZE = 100
26  
-ITER_CHUNK_SIZE = CHUNK_SIZE
27  
-
28 23
 # The maximum number of items to display in a QuerySet.__repr__
29 24
 REPR_OUTPUT_SIZE = 20
30 25
 
@@ -41,7 +36,6 @@ def __init__(self, model=None, query=None, using=None):
41 36
         self._db = using
42 37
         self.query = query or sql.Query(self.model)
43 38
         self._result_cache = None
44  
-        self._iter = None
45 39
         self._sticky_filter = False
46 40
         self._for_write = False
47 41
         self._prefetch_related_lookups = []
@@ -58,7 +52,7 @@ def __deepcopy__(self, memo):
58 52
         """
59 53
         obj = self.__class__()
60 54
         for k,v in self.__dict__.items():
61  
-            if k in ('_iter','_result_cache'):
  55
+            if k in ('_result_cache'):
62 56
                 obj.__dict__[k] = None
63 57
             else:
64 58
                 obj.__dict__[k] = copy.deepcopy(v, memo)
@@ -69,10 +63,9 @@ def __getstate__(self):
69 63
         Allows the QuerySet to be pickled.
70 64
         """
71 65
         # Force the cache to be fully populated.
72  
-        len(self)
  66
+        self._fetch_all()
73 67
 
74 68
         obj_dict = self.__dict__.copy()
75  
-        obj_dict['_iter'] = None
76 69
         return obj_dict
77 70
 
78 71
     def __repr__(self):
@@ -85,92 +78,32 @@ def __len__(self):
85 78
         # Since __len__ is called quite frequently (for example, as part of
86 79
         # list(qs), we make some effort here to be as efficient as possible
87 80
         # whilst not messing up any existing iterators against the QuerySet.
88  
-        if self._result_cache is None:
89  
-            if self._iter:
90  
-                self._result_cache = list(self._iter)
91  
-            else:
92  
-                self._result_cache = list(self.iterator())
93  
-        elif self._iter:
94  
-            self._result_cache.extend(self._iter)
95  
-        if self._prefetch_related_lookups and not self._prefetch_done:
96  
-            self._prefetch_related_objects()
  81
+        self._fetch_all()
97 82
         return len(self._result_cache)
98 83
 
99 84
     def __iter__(self):
100  
-        if self._prefetch_related_lookups and not self._prefetch_done:
101  
-            # We need all the results in order to be able to do the prefetch
102  
-            # in one go. To minimize code duplication, we use the __len__
103  
-            # code path which also forces this, and also does the prefetch
104  
-            len(self)
105  
-
106  
-        if self._result_cache is None:
107  
-            self._iter = self.iterator()
108  
-            self._result_cache = []
109  
-        if self._iter:
110  
-            return self._result_iter()
111  
-        # Python's list iterator is better than our version when we're just
112  
-        # iterating over the cache.
  85
+        """
  86
+        The queryset iterator protocol uses four nested iterators in the
  87
+        default iterator case:
  88
+            1. The DB backend's cursor iterator.
  89
+            2. sql/compiler.execute_sql()
  90
+               - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE)
  91
+                 using cursor.fetchmany(). This part is responsible for
  92
+                 doing some column masking, and returning the rows in chunks.
  93
+            3. sql/compiler.results_iter()
  94
+               - Returns one row at time. At this point the rows are still just
  95
+                 tuples. In some cases the return values are converted to
  96
+                 Python values at this time (see resolve_columns(),
  97
+                 resolve_aggregate()).
  98
+            4. self.iterator()
  99
+               - Responsible for turning the rows into model objects.
  100
+        """
  101
+        self._fetch_all()   
113 102
         return iter(self._result_cache)
114 103
 
115  
-    def _result_iter(self):
116  
-        pos = 0
117  
-        while 1:
118  
-            upper = len(self._result_cache)
119  
-            while pos < upper:
120  
-                yield self._result_cache[pos]
121  
-                pos = pos + 1
122  
-            if not self._iter:
123  
-                raise StopIteration
124  
-            if len(self._result_cache) <= pos:
125  
-                self._fill_cache()
126  
-
127  
-    def __bool__(self):
128  
-        if self._prefetch_related_lookups and not self._prefetch_done:
129  
-            # We need all the results in order to be able to do the prefetch
130  
-            # in one go. To minimize code duplication, we use the __len__
131  
-            # code path which also forces this, and also does the prefetch
132  
-            len(self)
133  
-
134  
-        if self._result_cache is not None:
135  
-            return bool(self._result_cache)
136  
-        try:
137  
-            next(iter(self))
138  
-        except StopIteration:
139  
-            return False
140  
-        return True
141  
-
142  
-    def __nonzero__(self):      # Python 2 compatibility
143  
-        return type(self).__bool__(self)
144  
-
145  
-    def __contains__(self, val):
146  
-        # The 'in' operator works without this method, due to __iter__. This
147  
-        # implementation exists only to shortcut the creation of Model
148  
-        # instances, by bailing out early if we find a matching element.
149  
-        pos = 0
150  
-        if self._result_cache is not None:
151  
-            if val in self._result_cache:
152  
-                return True
153  
-            elif self._iter is None:
154  
-                # iterator is exhausted, so we have our answer
155  
-                return False
156  
-            # remember not to check these again:
157  
-            pos = len(self._result_cache)
158  
-        else:
159  
-            # We need to start filling the result cache out. The following
160  
-            # ensures that self._iter is not None and self._result_cache is not
161  
-            # None
162  
-            it = iter(self)
163  
-
164  
-        # Carry on, one result at a time.
165  
-        while True:
166  
-            if len(self._result_cache) <= pos:
167  
-                self._fill_cache(num=1)
168  
-            if self._iter is None:
169  
-                # we ran out of items
170  
-                return False
171  
-            if self._result_cache[pos] == val:
172  
-                return True
173  
-            pos += 1
  104
+    def __nonzero__(self):
  105
+        self._fetch_all()
  106
+        return bool(self._result_cache)
174 107
 
175 108
     def __getitem__(self, k):
176 109
         """
@@ -184,19 +117,6 @@ def __getitem__(self, k):
184 117
                 "Negative indexing is not supported."
185 118
 
186 119
         if self._result_cache is not None:
187  
-            if self._iter is not None:
188  
-                # The result cache has only been partially populated, so we may
189  
-                # need to fill it out a bit more.
190  
-                if isinstance(k, slice):
191  
-                    if k.stop is not None:
192  
-                        # Some people insist on passing in strings here.
193  
-                        bound = int(k.stop)
194  
-                    else:
195  
-                        bound = None
196  
-                else:
197  
-                    bound = k + 1
198  
-                if len(self._result_cache) < bound:
199  
-                    self._fill_cache(bound - len(self._result_cache))
200 120
             return self._result_cache[k]
201 121
 
202 122
         if isinstance(k, slice):
@@ -370,7 +290,7 @@ def count(self):
370 290
         If the QuerySet is already fully cached this simply returns the length
371 291
         of the cached results set to avoid multiple SELECT COUNT(*) calls.
372 292
         """
373  
-        if self._result_cache is not None and not self._iter:
  293
+        if self._result_cache is not None:
374 294
             return len(self._result_cache)
375 295
 
376 296
         return self.query.get_count(using=self.db)
@@ -915,17 +835,11 @@ def _clone(self, klass=None, setup=False, **kwargs):
915 835
             c._setup_query()
916 836
         return c
917 837
 
918  
-    def _fill_cache(self, num=None):
919  
-        """
920  
-        Fills the result cache with 'num' more entries (or until the results
921  
-        iterator is exhausted).
922  
-        """
923  
-        if self._iter:
924  
-            try:
925  
-                for i in range(num or ITER_CHUNK_SIZE):
926  
-                    self._result_cache.append(next(self._iter))
927  
-            except StopIteration:
928  
-                self._iter = None
  838
+    def _fetch_all(self):
  839
+        if self._result_cache is None:
  840
+            self._result_cache = list(self.iterator())
  841
+        if self._prefetch_related_lookups and not self._prefetch_done:
  842
+            self._prefetch_related_objects()
929 843
 
930 844
     def _next_is_sticky(self):
931 845
         """
21  docs/releases/1.6.txt
@@ -481,6 +481,27 @@ parameters. For example::
481 481
 
482 482
 ``SQLite`` users need to check and update such queries.
483 483
 
  484
+QuerySet iteration
  485
+~~~~~~~~~~~~~~~~~~
  486
+
  487
+The ``QuerySet`` iteration has been changed to immediately convert all
  488
+fetched rows to ``Model`` objects. In Django 1.5 and earlier the rows
  489
+fetched from database were converted to ``Model`` objects in chunks of
  490
+100.
  491
+
  492
+Most databases already fetched all the rows at once, so the amount of
  493
+fetched rows haven't changed for these databases, only when the rows are
  494
+converted to ``Model`` objects. Of the core databases only Oracle
  495
+supported fetching rows in chunks.
  496
+
  497
+In practice the change can affect queryset usage when the whole queryset
  498
+isn't used. Such usages include partial looping over the queryset or any
  499
+usage which ends up doing ``__bool__`` or ``__contains__``.
  500
+
  501
+It is still possible to convert the fetched rows to ``Model`` objects
  502
+lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()`
  503
+method.
  504
+
484 505
 Miscellaneous
485 506
 ~~~~~~~~~~~~~
486 507
 
44  tests/queries/tests.py
@@ -9,7 +9,6 @@
9 9
 from django.core.exceptions import FieldError
10 10
 from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
11 11
 from django.db.models import Count, F, Q
12  
-from django.db.models.query import ITER_CHUNK_SIZE
13 12
 from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
14 13
 from django.db.models.sql.datastructures import EmptyResultSet
15 14
 from django.test import TestCase, skipUnlessDBFeature
@@ -1211,16 +1210,6 @@ def test_ticket12239(self):
1211 1210
             ordered=False
1212 1211
         )
1213 1212
 
1214  
-    def test_ticket7411(self):
1215  
-        # Saving to db must work even with partially read result set in another
1216  
-        # cursor.
1217  
-        for num in range(2 * ITER_CHUNK_SIZE + 1):
1218  
-            _ = Number.objects.create(num=num)
1219  
-
1220  
-        for i, obj in enumerate(Number.objects.all()):
1221  
-            obj.save()
1222  
-            if i > 10: break
1223  
-
1224 1213
     def test_ticket7759(self):
1225 1214
         # Count should work with a partially read result set.
1226 1215
         count = Number.objects.count()
@@ -1700,31 +1689,6 @@ def setUp(self):
1700 1689
         ann1.notes.add(n1)
1701 1690
         ann2 = Annotation.objects.create(name='a2', tag=t4)
1702 1691
 
1703  
-    # This next test used to cause really weird PostgreSQL behavior, but it was
1704  
-    # only apparent much later when the full test suite ran.
1705  
-    #  - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100...
1706  
-    #@unittest.expectedFailure
1707  
-    def test_slicing_and_cache_interaction(self):
1708  
-        # We can do slicing beyond what is currently in the result cache,
1709  
-        # too.
1710  
-
1711  
-        # We need to mess with the implementation internals a bit here to decrease the
1712  
-        # cache fill size so that we don't read all the results at once.
1713  
-        from django.db.models import query
1714  
-        query.ITER_CHUNK_SIZE = 2
1715  
-        qs = Tag.objects.all()
1716  
-
1717  
-        # Fill the cache with the first chunk.
1718  
-        self.assertTrue(bool(qs))
1719  
-        self.assertEqual(len(qs._result_cache), 2)
1720  
-
1721  
-        # Query beyond the end of the cache and check that it is filled out as required.
1722  
-        self.assertEqual(repr(qs[4]), '<Tag: t5>')
1723  
-        self.assertEqual(len(qs._result_cache), 5)
1724  
-
1725  
-        # But querying beyond the end of the result set will fail.
1726  
-        self.assertRaises(IndexError, lambda: qs[100])
1727  
-
1728 1692
     def test_parallel_iterators(self):
1729 1693
         # Test that parallel iterators work.
1730 1694
         qs = Tag.objects.all()
@@ -2533,6 +2497,14 @@ def test_empty_nodes(self):
2533 2497
         w = WhereNode(children=[empty_w, NothingNode()], connector='OR')
2534 2498
         self.assertRaises(EmptyResultSet, w.as_sql, qn, connection)
2535 2499
 
  2500
+
  2501
+class IteratorExceptionsTest(TestCase):
  2502
+    def test_iter_exceptions(self):
  2503
+        qs = ExtraInfo.objects.only('author')
  2504
+        with self.assertRaises(AttributeError):
  2505
+            list(qs)
  2506
+
  2507
+
2536 2508
 class NullJoinPromotionOrTest(TestCase):
2537 2509
     def setUp(self):
2538 2510
         self.d1 = ModelD.objects.create(name='foo')

No commit comments for this range

Something went wrong with that request. Please try again.