Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

queryset-refactor: Converted the queryset iterator to be a real itera…

…tor and

only populate the result cache on demand. We actually populate the result cache
100 elements at a time, rather than one at a time for efficiency, but this is a
real win when the resultset contains 10,000 objects for example.

This also provides an efficient boolean (__nonzero__) test that doesn't use up
a lot of memory if you don't read all the results.

Refs #2430, #5987.


git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit dd2251a653f45406a720203c35e40adac5411b5a 1 parent 98abf27
Malcolm Tredinnick authored January 26, 2008
55  django/db/models/query.py
@@ -21,6 +21,7 @@
21 21
 # Used to control how many objects are worked with at once in some cases (e.g.
22 22
 # when deleting objects).
23 23
 CHUNK_SIZE = 100
  24
+ITER_CHUNK_SIZE = CHUNK_SIZE
24 25
 
25 26
 class _QuerySet(object):
26 27
     "Represents a lazy database lookup for a set of objects"
@@ -28,19 +29,40 @@ def __init__(self, model=None, query=None):
28 29
         self.model = model
29 30
         self.query = query or sql.Query(self.model, connection)
30 31
         self._result_cache = None
  32
+        self._iter = None
31 33
 
32 34
     ########################
33 35
     # PYTHON MAGIC METHODS #
34 36
     ########################
35 37
 
36 38
     def __repr__(self):
37  
-        return repr(self._get_data())
  39
+        return repr(list(iter(self)))
38 40
 
39 41
     def __len__(self):
40  
-        return len(self._get_data())
  42
+        return len(list(iter(self)))
41 43
 
42 44
     def __iter__(self):
43  
-        return iter(self._get_data())
  45
+        pos = 0
  46
+        if self._result_cache is None:
  47
+            self._iter = self.iterator()
  48
+            self._result_cache = []
  49
+        while 1:
  50
+            upper = len(self._result_cache)
  51
+            while pos < upper:
  52
+                yield self._result_cache[pos]
  53
+                pos = pos + 1
  54
+            if not self._iter:
  55
+                raise StopIteration
  56
+            if len(self._result_cache) <= pos:
  57
+                self._fill_cache()
  58
+
  59
+    def __nonzero__(self):
  60
+        if self._result_cache is None:
  61
+            try:
  62
+                iter(self).next()
  63
+            except StopIteration:
  64
+                return False
  65
+        return True
44 66
 
45 67
     def __getitem__(self, k):
46 68
         "Retrieve an item or slice from the set of results."
@@ -52,6 +74,15 @@ def __getitem__(self, k):
52 74
                 "Negative indexing is not supported."
53 75
 
54 76
         if self._result_cache is not None:
  77
+            if self._iter is not None:
  78
+                # The result cache has only been partially populated, so we may
  79
+                # need to fill it out a bit more.
  80
+                if isinstance(k, slice):
  81
+                    bound = k.stop
  82
+                else:
  83
+                    bound = k + 1
  84
+                if len(self._result_cache) < bound:
  85
+                    self._fill_cache(bound - len(self._result_cache))
55 86
             return self._result_cache[k]
56 87
 
57 88
         if isinstance(k, slice):
@@ -375,10 +406,17 @@ def _clone(self, klass=None, setup=False, **kwargs):
375 406
             c._setup_query()
376 407
         return c
377 408
 
378  
-    def _get_data(self):
379  
-        if self._result_cache is None:
380  
-            self._result_cache = list(self.iterator())
381  
-        return self._result_cache
  409
+    def _fill_cache(self, num=None):
  410
+        """
  411
+        Fills the result cache with 'num' more entries (or until the results
  412
+        iterator is exhausted).
  413
+        """
  414
+        if self._iter:
  415
+            try:
  416
+                for i in range(num or ITER_CHUNK_SIZE):
  417
+                    self._result_cache.append(self._iter.next())
  418
+            except StopIteration:
  419
+                self._iter = None
382 420
 
383 421
 # Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet.
384 422
 if connection.features.uses_custom_queryset:
@@ -395,6 +433,9 @@ def __init__(self, *args, **kwargs):
395 433
         # QuerySet.clone() will also set up the _fields attribute with the
396 434
         # names of the model fields to select.
397 435
 
  436
+    def __iter__(self):
  437
+        return self.iterator()
  438
+
398 439
     def iterator(self):
399 440
         self.field_names.extend([f for f in self.query.extra_select.keys()])
400 441
         for row in self.query.results_iter():
37  tests/regressiontests/queries/models.py
@@ -501,5 +501,42 @@ class Y(models.Model):
501 501
 2
502 502
 >>> len(Item.objects.dates('created', 'day'))
503 503
 2
  504
+
  505
+Test that parallel iterators work.
  506
+
  507
+>>> qs = Tag.objects.all()
  508
+>>> i1, i2 = iter(qs), iter(qs)
  509
+>>> i1.next(), i1.next()
  510
+(<Tag: t1>, <Tag: t2>)
  511
+>>> i2.next(), i2.next(), i2.next()
  512
+(<Tag: t1>, <Tag: t2>, <Tag: t3>)
  513
+>>> i1.next()
  514
+<Tag: t3>
  515
+
  516
+We can do slicing beyond what is currently in the result cache, too.
  517
+
  518
+# We need to mess with the implemenation internals a bit here to decrease the
  519
+# cache fill size so that we don't read all the results at once.
  520
+>>> from django.db.models import query
  521
+>>> query.ITER_CHUNK_SIZE = 2
  522
+>>> qs = Tag.objects.all()
  523
+
  524
+# Fill the cache with the first chunk.
  525
+>>> bool(qs)
  526
+True
  527
+>>> len(qs._result_cache)
  528
+2
  529
+
  530
+# Query beyond the end of the cache and check that it is filled out as required.
  531
+>>> qs[4]
  532
+<Tag: t5>
  533
+>>> len(qs._result_cache)
  534
+5
  535
+
  536
+# But querying beyond the end of the result set will fail.
  537
+>>> qs[100]
  538
+Traceback (most recent call last):
  539
+...
  540
+IndexError: ...
504 541
 """}
505 542
 

0 notes on commit dd2251a

Please sign in to comment.
Something went wrong with that request. Please try again.