Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

We’re showing branches in this repository, but you can also compare across forks.

base fork: django/django
...
head fork: akaariai/django
compare: non_chunked_reads
  • 1 commit
  • 3 files changed
  • 0 commit comments
  • 1 contributor
144 django/db/models/query.py
View
@@ -20,11 +20,6 @@
from django.utils import six
from django.utils import timezone
-# Used to control how many objects are worked with at once in some cases (e.g.
-# when deleting objects).
-CHUNK_SIZE = 100
-ITER_CHUNK_SIZE = CHUNK_SIZE
-
# The maximum number of items to display in a QuerySet.__repr__
REPR_OUTPUT_SIZE = 20
@@ -41,7 +36,6 @@ def __init__(self, model=None, query=None, using=None):
self._db = using
self.query = query or sql.Query(self.model)
self._result_cache = None
- self._iter = None
self._sticky_filter = False
self._for_write = False
self._prefetch_related_lookups = []
@@ -58,7 +52,7 @@ def __deepcopy__(self, memo):
"""
obj = self.__class__()
for k,v in self.__dict__.items():
- if k in ('_iter','_result_cache'):
+ if k in ('_result_cache'):
obj.__dict__[k] = None
else:
obj.__dict__[k] = copy.deepcopy(v, memo)
@@ -69,10 +63,9 @@ def __getstate__(self):
Allows the QuerySet to be pickled.
"""
# Force the cache to be fully populated.
- len(self)
+ self._fetch_all()
obj_dict = self.__dict__.copy()
- obj_dict['_iter'] = None
return obj_dict
def __repr__(self):
@@ -85,92 +78,32 @@ def __len__(self):
# Since __len__ is called quite frequently (for example, as part of
# list(qs), we make some effort here to be as efficient as possible
# whilst not messing up any existing iterators against the QuerySet.
- if self._result_cache is None:
- if self._iter:
- self._result_cache = list(self._iter)
- else:
- self._result_cache = list(self.iterator())
- elif self._iter:
- self._result_cache.extend(self._iter)
- if self._prefetch_related_lookups and not self._prefetch_done:
- self._prefetch_related_objects()
+ self._fetch_all()
return len(self._result_cache)
def __iter__(self):
- if self._prefetch_related_lookups and not self._prefetch_done:
- # We need all the results in order to be able to do the prefetch
- # in one go. To minimize code duplication, we use the __len__
- # code path which also forces this, and also does the prefetch
- len(self)
-
- if self._result_cache is None:
- self._iter = self.iterator()
- self._result_cache = []
- if self._iter:
- return self._result_iter()
- # Python's list iterator is better than our version when we're just
- # iterating over the cache.
+ """
+ The queryset iterator protocol uses four nested iterators in the
+ default iterator case:
+ 1. The DB backend's cursor iterator.
+ 2. sql/compiler.execute_sql()
+ - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE)
+ using cursor.fetchmany(). This part is responsible for
+ doing some column masking, and returning the rows in chunks.
+ 3. sql/compiler.results_iter()
+ - Returns one row at time. At this point the rows are still just
+ tuples. In some cases the return values are converted to
+ Python values at this time (see resolve_columns(),
+ resolve_aggregate()).
+ 4. self.iterator()
+ - Responsible for turning the rows into model objects.
+ """
+ self._fetch_all()
return iter(self._result_cache)
- def _result_iter(self):
- pos = 0
- while 1:
- upper = len(self._result_cache)
- while pos < upper:
- yield self._result_cache[pos]
- pos = pos + 1
- if not self._iter:
- raise StopIteration
- if len(self._result_cache) <= pos:
- self._fill_cache()
-
- def __bool__(self):
- if self._prefetch_related_lookups and not self._prefetch_done:
- # We need all the results in order to be able to do the prefetch
- # in one go. To minimize code duplication, we use the __len__
- # code path which also forces this, and also does the prefetch
- len(self)
-
- if self._result_cache is not None:
- return bool(self._result_cache)
- try:
- next(iter(self))
- except StopIteration:
- return False
- return True
-
- def __nonzero__(self): # Python 2 compatibility
- return type(self).__bool__(self)
-
- def __contains__(self, val):
- # The 'in' operator works without this method, due to __iter__. This
- # implementation exists only to shortcut the creation of Model
- # instances, by bailing out early if we find a matching element.
- pos = 0
- if self._result_cache is not None:
- if val in self._result_cache:
- return True
- elif self._iter is None:
- # iterator is exhausted, so we have our answer
- return False
- # remember not to check these again:
- pos = len(self._result_cache)
- else:
- # We need to start filling the result cache out. The following
- # ensures that self._iter is not None and self._result_cache is not
- # None
- it = iter(self)
-
- # Carry on, one result at a time.
- while True:
- if len(self._result_cache) <= pos:
- self._fill_cache(num=1)
- if self._iter is None:
- # we ran out of items
- return False
- if self._result_cache[pos] == val:
- return True
- pos += 1
+ def __nonzero__(self):
+ self._fetch_all()
+ return bool(self._result_cache)
def __getitem__(self, k):
"""
@@ -184,19 +117,6 @@ def __getitem__(self, k):
"Negative indexing is not supported."
if self._result_cache is not None:
- if self._iter is not None:
- # The result cache has only been partially populated, so we may
- # need to fill it out a bit more.
- if isinstance(k, slice):
- if k.stop is not None:
- # Some people insist on passing in strings here.
- bound = int(k.stop)
- else:
- bound = None
- else:
- bound = k + 1
- if len(self._result_cache) < bound:
- self._fill_cache(bound - len(self._result_cache))
return self._result_cache[k]
if isinstance(k, slice):
@@ -370,7 +290,7 @@ def count(self):
If the QuerySet is already fully cached this simply returns the length
of the cached results set to avoid multiple SELECT COUNT(*) calls.
"""
- if self._result_cache is not None and not self._iter:
+ if self._result_cache is not None:
return len(self._result_cache)
return self.query.get_count(using=self.db)
@@ -915,17 +835,11 @@ def _clone(self, klass=None, setup=False, **kwargs):
c._setup_query()
return c
- def _fill_cache(self, num=None):
- """
- Fills the result cache with 'num' more entries (or until the results
- iterator is exhausted).
- """
- if self._iter:
- try:
- for i in range(num or ITER_CHUNK_SIZE):
- self._result_cache.append(next(self._iter))
- except StopIteration:
- self._iter = None
+ def _fetch_all(self):
+ if self._result_cache is None:
+ self._result_cache = list(self.iterator())
+ if self._prefetch_related_lookups and not self._prefetch_done:
+ self._prefetch_related_objects()
def _next_is_sticky(self):
"""
21 docs/releases/1.6.txt
View
@@ -481,6 +481,27 @@ parameters. For example::
``SQLite`` users need to check and update such queries.
+QuerySet iteration
+~~~~~~~~~~~~~~~~~~
+
+The ``QuerySet`` iteration has been changed to immediately convert all
+fetched rows to ``Model`` objects. In Django 1.5 and earlier the rows
+fetched from database were converted to ``Model`` objects in chunks of
+100.
+
+Most databases already fetched all the rows at once, so the amount of
+fetched rows haven't changed for these databases, only when the rows are
+converted to ``Model`` objects. Of the core databases only Oracle
+supported fetching rows in chunks.
+
+In practice the change can affect queryset usage when the whole queryset
+isn't used. Such usages include partial looping over the queryset or any
+usage which ends up doing ``__bool__`` or ``__contains__``.
+
+It is still possible to convert the fetched rows to ``Model`` objects
+lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()`
+method.
+
Miscellaneous
~~~~~~~~~~~~~
44 tests/queries/tests.py
View
@@ -9,7 +9,6 @@
from django.core.exceptions import FieldError
from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
from django.db.models import Count, F, Q
-from django.db.models.query import ITER_CHUNK_SIZE
from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
from django.db.models.sql.datastructures import EmptyResultSet
from django.test import TestCase, skipUnlessDBFeature
@@ -1211,16 +1210,6 @@ def test_ticket12239(self):
ordered=False
)
- def test_ticket7411(self):
- # Saving to db must work even with partially read result set in another
- # cursor.
- for num in range(2 * ITER_CHUNK_SIZE + 1):
- _ = Number.objects.create(num=num)
-
- for i, obj in enumerate(Number.objects.all()):
- obj.save()
- if i > 10: break
-
def test_ticket7759(self):
# Count should work with a partially read result set.
count = Number.objects.count()
@@ -1700,31 +1689,6 @@ def setUp(self):
ann1.notes.add(n1)
ann2 = Annotation.objects.create(name='a2', tag=t4)
- # This next test used to cause really weird PostgreSQL behavior, but it was
- # only apparent much later when the full test suite ran.
- # - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100...
- #@unittest.expectedFailure
- def test_slicing_and_cache_interaction(self):
- # We can do slicing beyond what is currently in the result cache,
- # too.
-
- # We need to mess with the implementation internals a bit here to decrease the
- # cache fill size so that we don't read all the results at once.
- from django.db.models import query
- query.ITER_CHUNK_SIZE = 2
- qs = Tag.objects.all()
-
- # Fill the cache with the first chunk.
- self.assertTrue(bool(qs))
- self.assertEqual(len(qs._result_cache), 2)
-
- # Query beyond the end of the cache and check that it is filled out as required.
- self.assertEqual(repr(qs[4]), '<Tag: t5>')
- self.assertEqual(len(qs._result_cache), 5)
-
- # But querying beyond the end of the result set will fail.
- self.assertRaises(IndexError, lambda: qs[100])
-
def test_parallel_iterators(self):
# Test that parallel iterators work.
qs = Tag.objects.all()
@@ -2533,6 +2497,14 @@ def test_empty_nodes(self):
w = WhereNode(children=[empty_w, NothingNode()], connector='OR')
self.assertRaises(EmptyResultSet, w.as_sql, qn, connection)
+
+class IteratorExceptionsTest(TestCase):
+ def test_iter_exceptions(self):
+ qs = ExtraInfo.objects.only('author')
+ with self.assertRaises(AttributeError):
+ list(qs)
+
+
class NullJoinPromotionOrTest(TestCase):
def setUp(self):
self.d1 = ModelD.objects.create(name='foo')

No commit comments for this range

Something went wrong with that request. Please try again.