Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: django/django
...
head fork: akaariai/django
compare: non_chunked_reads
Checking mergeability… Don’t worry, you can still create the pull request.
  • 1 commit
  • 3 files changed
  • 0 commit comments
  • 1 contributor
View
144 django/db/models/query.py
@@ -20,11 +20,6 @@
from django.utils import six
from django.utils import timezone
-# Used to control how many objects are worked with at once in some cases (e.g.
-# when deleting objects).
-CHUNK_SIZE = 100
-ITER_CHUNK_SIZE = CHUNK_SIZE
-
# The maximum number of items to display in a QuerySet.__repr__
REPR_OUTPUT_SIZE = 20
@@ -41,7 +36,6 @@ def __init__(self, model=None, query=None, using=None):
self._db = using
self.query = query or sql.Query(self.model)
self._result_cache = None
- self._iter = None
self._sticky_filter = False
self._for_write = False
self._prefetch_related_lookups = []
@@ -58,7 +52,7 @@ def __deepcopy__(self, memo):
"""
obj = self.__class__()
for k,v in self.__dict__.items():
- if k in ('_iter','_result_cache'):
+ if k in ('_result_cache'):
obj.__dict__[k] = None
else:
obj.__dict__[k] = copy.deepcopy(v, memo)
@@ -69,10 +63,9 @@ def __getstate__(self):
Allows the QuerySet to be pickled.
"""
# Force the cache to be fully populated.
- len(self)
+ self._fetch_all()
obj_dict = self.__dict__.copy()
- obj_dict['_iter'] = None
return obj_dict
def __repr__(self):
@@ -85,92 +78,32 @@ def __len__(self):
# Since __len__ is called quite frequently (for example, as part of
# list(qs), we make some effort here to be as efficient as possible
# whilst not messing up any existing iterators against the QuerySet.
- if self._result_cache is None:
- if self._iter:
- self._result_cache = list(self._iter)
- else:
- self._result_cache = list(self.iterator())
- elif self._iter:
- self._result_cache.extend(self._iter)
- if self._prefetch_related_lookups and not self._prefetch_done:
- self._prefetch_related_objects()
+ self._fetch_all()
return len(self._result_cache)
def __iter__(self):
- if self._prefetch_related_lookups and not self._prefetch_done:
- # We need all the results in order to be able to do the prefetch
- # in one go. To minimize code duplication, we use the __len__
- # code path which also forces this, and also does the prefetch
- len(self)
-
- if self._result_cache is None:
- self._iter = self.iterator()
- self._result_cache = []
- if self._iter:
- return self._result_iter()
- # Python's list iterator is better than our version when we're just
- # iterating over the cache.
+ """
+ The queryset iterator protocol uses four nested iterators in the
+ default iterator case:
+ 1. The DB backend's cursor iterator.
+ 2. sql/compiler.execute_sql()
+ - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE)
+ using cursor.fetchmany(). This part is responsible for
+ doing some column masking, and returning the rows in chunks.
+ 3. sql/compiler.results_iter()
+ - Returns one row at time. At this point the rows are still just
+ tuples. In some cases the return values are converted to
+ Python values at this time (see resolve_columns(),
+ resolve_aggregate()).
+ 4. self.iterator()
+ - Responsible for turning the rows into model objects.
+ """
+ self._fetch_all()
return iter(self._result_cache)
- def _result_iter(self):
- pos = 0
- while 1:
- upper = len(self._result_cache)
- while pos < upper:
- yield self._result_cache[pos]
- pos = pos + 1
- if not self._iter:
- raise StopIteration
- if len(self._result_cache) <= pos:
- self._fill_cache()
-
- def __bool__(self):
- if self._prefetch_related_lookups and not self._prefetch_done:
- # We need all the results in order to be able to do the prefetch
- # in one go. To minimize code duplication, we use the __len__
- # code path which also forces this, and also does the prefetch
- len(self)
-
- if self._result_cache is not None:
- return bool(self._result_cache)
- try:
- next(iter(self))
- except StopIteration:
- return False
- return True
-
- def __nonzero__(self): # Python 2 compatibility
- return type(self).__bool__(self)
-
- def __contains__(self, val):
- # The 'in' operator works without this method, due to __iter__. This
- # implementation exists only to shortcut the creation of Model
- # instances, by bailing out early if we find a matching element.
- pos = 0
- if self._result_cache is not None:
- if val in self._result_cache:
- return True
- elif self._iter is None:
- # iterator is exhausted, so we have our answer
- return False
- # remember not to check these again:
- pos = len(self._result_cache)
- else:
- # We need to start filling the result cache out. The following
- # ensures that self._iter is not None and self._result_cache is not
- # None
- it = iter(self)
-
- # Carry on, one result at a time.
- while True:
- if len(self._result_cache) <= pos:
- self._fill_cache(num=1)
- if self._iter is None:
- # we ran out of items
- return False
- if self._result_cache[pos] == val:
- return True
- pos += 1
+ def __nonzero__(self):
+ self._fetch_all()
+ return bool(self._result_cache)
def __getitem__(self, k):
"""
@@ -184,19 +117,6 @@ def __getitem__(self, k):
"Negative indexing is not supported."
if self._result_cache is not None:
- if self._iter is not None:
- # The result cache has only been partially populated, so we may
- # need to fill it out a bit more.
- if isinstance(k, slice):
- if k.stop is not None:
- # Some people insist on passing in strings here.
- bound = int(k.stop)
- else:
- bound = None
- else:
- bound = k + 1
- if len(self._result_cache) < bound:
- self._fill_cache(bound - len(self._result_cache))
return self._result_cache[k]
if isinstance(k, slice):
@@ -370,7 +290,7 @@ def count(self):
If the QuerySet is already fully cached this simply returns the length
of the cached results set to avoid multiple SELECT COUNT(*) calls.
"""
- if self._result_cache is not None and not self._iter:
+ if self._result_cache is not None:
return len(self._result_cache)
return self.query.get_count(using=self.db)
@@ -915,17 +835,11 @@ def _clone(self, klass=None, setup=False, **kwargs):
c._setup_query()
return c
- def _fill_cache(self, num=None):
- """
- Fills the result cache with 'num' more entries (or until the results
- iterator is exhausted).
- """
- if self._iter:
- try:
- for i in range(num or ITER_CHUNK_SIZE):
- self._result_cache.append(next(self._iter))
- except StopIteration:
- self._iter = None
+ def _fetch_all(self):
+ if self._result_cache is None:
+ self._result_cache = list(self.iterator())
+ if self._prefetch_related_lookups and not self._prefetch_done:
+ self._prefetch_related_objects()
def _next_is_sticky(self):
"""
View
21 docs/releases/1.6.txt
@@ -481,6 +481,27 @@ parameters. For example::
``SQLite`` users need to check and update such queries.
+QuerySet iteration
+~~~~~~~~~~~~~~~~~~
+
+The ``QuerySet`` iteration has been changed to immediately convert all
+fetched rows to ``Model`` objects. In Django 1.5 and earlier the rows
+fetched from database were converted to ``Model`` objects in chunks of
+100.
+
+Most databases already fetched all the rows at once, so the amount of
+fetched rows haven't changed for these databases, only when the rows are
+converted to ``Model`` objects. Of the core databases only Oracle
+supported fetching rows in chunks.
+
+In practice the change can affect queryset usage when the whole queryset
+isn't used. Such usages include partial looping over the queryset or any
+usage which ends up doing ``__bool__`` or ``__contains__``.
+
+It is still possible to convert the fetched rows to ``Model`` objects
+lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()`
+method.
+
Miscellaneous
~~~~~~~~~~~~~
View
44 tests/queries/tests.py
@@ -9,7 +9,6 @@
from django.core.exceptions import FieldError
from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
from django.db.models import Count, F, Q
-from django.db.models.query import ITER_CHUNK_SIZE
from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
from django.db.models.sql.datastructures import EmptyResultSet
from django.test import TestCase, skipUnlessDBFeature
@@ -1211,16 +1210,6 @@ def test_ticket12239(self):
ordered=False
)
- def test_ticket7411(self):
- # Saving to db must work even with partially read result set in another
- # cursor.
- for num in range(2 * ITER_CHUNK_SIZE + 1):
- _ = Number.objects.create(num=num)
-
- for i, obj in enumerate(Number.objects.all()):
- obj.save()
- if i > 10: break
-
def test_ticket7759(self):
# Count should work with a partially read result set.
count = Number.objects.count()
@@ -1700,31 +1689,6 @@ def setUp(self):
ann1.notes.add(n1)
ann2 = Annotation.objects.create(name='a2', tag=t4)
- # This next test used to cause really weird PostgreSQL behavior, but it was
- # only apparent much later when the full test suite ran.
- # - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100...
- #@unittest.expectedFailure
- def test_slicing_and_cache_interaction(self):
- # We can do slicing beyond what is currently in the result cache,
- # too.
-
- # We need to mess with the implementation internals a bit here to decrease the
- # cache fill size so that we don't read all the results at once.
- from django.db.models import query
- query.ITER_CHUNK_SIZE = 2
- qs = Tag.objects.all()
-
- # Fill the cache with the first chunk.
- self.assertTrue(bool(qs))
- self.assertEqual(len(qs._result_cache), 2)
-
- # Query beyond the end of the cache and check that it is filled out as required.
- self.assertEqual(repr(qs[4]), '<Tag: t5>')
- self.assertEqual(len(qs._result_cache), 5)
-
- # But querying beyond the end of the result set will fail.
- self.assertRaises(IndexError, lambda: qs[100])
-
def test_parallel_iterators(self):
# Test that parallel iterators work.
qs = Tag.objects.all()
@@ -2533,6 +2497,14 @@ def test_empty_nodes(self):
w = WhereNode(children=[empty_w, NothingNode()], connector='OR')
self.assertRaises(EmptyResultSet, w.as_sql, qn, connection)
+
+class IteratorExceptionsTest(TestCase):
+ def test_iter_exceptions(self):
+ qs = ExtraInfo.objects.only('author')
+ with self.assertRaises(AttributeError):
+ list(qs)
+
+
class NullJoinPromotionOrTest(TestCase):
def setUp(self):
self.d1 = ModelD.objects.create(name='foo')

No commit comments for this range

Something went wrong with that request. Please try again.