Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

queryset-refactor: Converted the queryset iterator to be a real itera…

…tor and

only populate the result cache on demand. We actually populate the result cache
100 elements at a time, rather than one at a time for efficiency, but this is a
real win when the resultset contains 10,000 objects for example.

This also provides an efficient boolean (__nonzero__) test that doesn't use up
a lot of memory if you don't read all the results.

Refs #2430, #5987.


git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit dd2251a653f45406a720203c35e40adac5411b5a 1 parent 98abf27
@malcolmt malcolmt authored
Showing with 85 additions and 7 deletions.
  1. +48 −7 django/db/models/query.py
  2. +37 −0 tests/regressiontests/queries/models.py
View
55 django/db/models/query.py
@@ -21,6 +21,7 @@
# Used to control how many objects are worked with at once in some cases (e.g.
# when deleting objects).
CHUNK_SIZE = 100
+ITER_CHUNK_SIZE = CHUNK_SIZE
class _QuerySet(object):
"Represents a lazy database lookup for a set of objects"
@@ -28,19 +29,40 @@ def __init__(self, model=None, query=None):
self.model = model
self.query = query or sql.Query(self.model, connection)
self._result_cache = None
+ self._iter = None
########################
# PYTHON MAGIC METHODS #
########################
def __repr__(self):
- return repr(self._get_data())
+ return repr(list(iter(self)))
def __len__(self):
- return len(self._get_data())
+ return len(list(iter(self)))
def __iter__(self):
- return iter(self._get_data())
+ pos = 0
+ if self._result_cache is None:
+ self._iter = self.iterator()
+ self._result_cache = []
+ while 1:
+ upper = len(self._result_cache)
+ while pos < upper:
+ yield self._result_cache[pos]
+ pos = pos + 1
+ if not self._iter:
+ raise StopIteration
+ if len(self._result_cache) <= pos:
+ self._fill_cache()
+
+ def __nonzero__(self):
+ if self._result_cache is None:
+ try:
+ iter(self).next()
+ except StopIteration:
+ return False
+ return True
def __getitem__(self, k):
"Retrieve an item or slice from the set of results."
@@ -52,6 +74,15 @@ def __getitem__(self, k):
"Negative indexing is not supported."
if self._result_cache is not None:
+ if self._iter is not None:
+ # The result cache has only been partially populated, so we may
+ # need to fill it out a bit more.
+ if isinstance(k, slice):
+ bound = k.stop
+ else:
+ bound = k + 1
+ if len(self._result_cache) < bound:
+ self._fill_cache(bound - len(self._result_cache))
return self._result_cache[k]
if isinstance(k, slice):
@@ -375,10 +406,17 @@ def _clone(self, klass=None, setup=False, **kwargs):
c._setup_query()
return c
- def _get_data(self):
- if self._result_cache is None:
- self._result_cache = list(self.iterator())
- return self._result_cache
+ def _fill_cache(self, num=None):
+ """
+ Fills the result cache with 'num' more entries (or until the results
+ iterator is exhausted).
+ """
+ if self._iter:
+ try:
+ for i in range(num or ITER_CHUNK_SIZE):
+ self._result_cache.append(self._iter.next())
+ except StopIteration:
+ self._iter = None
# Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet.
if connection.features.uses_custom_queryset:
@@ -395,6 +433,9 @@ def __init__(self, *args, **kwargs):
# QuerySet.clone() will also set up the _fields attribute with the
# names of the model fields to select.
+ def __iter__(self):
+ return self.iterator()
+
def iterator(self):
self.field_names.extend([f for f in self.query.extra_select.keys()])
for row in self.query.results_iter():
View
37 tests/regressiontests/queries/models.py
@@ -501,5 +501,42 @@ class Y(models.Model):
2
>>> len(Item.objects.dates('created', 'day'))
2
+
+Test that parallel iterators work.
+
+>>> qs = Tag.objects.all()
+>>> i1, i2 = iter(qs), iter(qs)
+>>> i1.next(), i1.next()
+(<Tag: t1>, <Tag: t2>)
+>>> i2.next(), i2.next(), i2.next()
+(<Tag: t1>, <Tag: t2>, <Tag: t3>)
+>>> i1.next()
+<Tag: t3>
+
+We can do slicing beyond what is currently in the result cache, too.
+
+# We need to mess with the implemenation internals a bit here to decrease the
+# cache fill size so that we don't read all the results at once.
+>>> from django.db.models import query
+>>> query.ITER_CHUNK_SIZE = 2
+>>> qs = Tag.objects.all()
+
+# Fill the cache with the first chunk.
+>>> bool(qs)
+True
+>>> len(qs._result_cache)
+2
+
+# Query beyond the end of the cache and check that it is filled out as required.
+>>> qs[4]
+<Tag: t5>
+>>> len(qs._result_cache)
+5
+
+# But querying beyond the end of the result set will fail.
+>>> qs[100]
+Traceback (most recent call last):
+...
+IndexError: ...
"""}

0 comments on commit dd2251a

Please sign in to comment.
Something went wrong with that request. Please try again.