Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #17003 - prefetch_related should support foreign keys/one-to-one

Support for `GenericForeignKey` is also included.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16939 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 052a011ee6122482a471795c1994bbcfdb069611 1 parent 672f2db
Luke Plant authored
52  django/contrib/contenttypes/generic.py
@@ -2,7 +2,10 @@
2 2
 Classes allowing "generic" relations through ContentType and object-id fields.
3 3
 """
4 4
 
  5
+from collections import defaultdict
5 6
 from functools import partial
  7
+from operator import attrgetter
  8
+
6 9
 from django.core.exceptions import ObjectDoesNotExist
7 10
 from django.db import connection
8 11
 from django.db.models import signals
@@ -59,6 +62,49 @@ def get_content_type(self, obj=None, id=None, using=None):
59 62
             # This should never happen. I love comments like this, don't you?
60 63
             raise Exception("Impossible arguments to GFK.get_content_type!")
61 64
 
  65
+    def get_prefetch_query_set(self, instances):
  66
+        # For efficiency, group the instances by content type and then do one
  67
+        # query per model
  68
+        fk_dict = defaultdict(list)
  69
+        # We need one instance for each group in order to get the right db:
  70
+        instance_dict = {}
  71
+        ct_attname = self.model._meta.get_field(self.ct_field).get_attname()
  72
+        for instance in instances:
  73
+            # We avoid looking for values if either ct_id or fkey value is None
  74
+            ct_id = getattr(instance, ct_attname)
  75
+            if ct_id is not None:
  76
+                fk_val = getattr(instance, self.fk_field)
  77
+                if fk_val is not None:
  78
+                    fk_dict[ct_id].append(fk_val)
  79
+                    instance_dict[ct_id] = instance
  80
+
  81
+        ret_val = []
  82
+        for ct_id, fkeys in fk_dict.items():
  83
+            instance = instance_dict[ct_id]
  84
+            ct = self.get_content_type(id=ct_id, using=instance._state.db)
  85
+            ret_val.extend(ct.get_all_objects_for_this_type(pk__in=fkeys))
  86
+
  87
+        # For doing the join in Python, we have to match both the FK val and the
  88
+        # content type, so the 'attr' vals we return need to be callables that
  89
+        # will return a (fk, class) pair.
  90
+        def gfk_key(obj):
  91
+            ct_id = getattr(obj, ct_attname)
  92
+            if ct_id is None:
  93
+                return None
  94
+            else:
  95
+                return (getattr(obj, self.fk_field),
  96
+                        self.get_content_type(id=ct_id,
  97
+                                              using=obj._state.db).model_class())
  98
+
  99
+        return (ret_val,
  100
+                lambda obj: (obj._get_pk_val(), obj.__class__),
  101
+                gfk_key,
  102
+                True,
  103
+                self.cache_attr)
  104
+
  105
+    def is_cached(self, instance):
  106
+        return hasattr(instance, self.cache_attr)
  107
+
62 108
     def __get__(self, instance, instance_type=None):
63 109
         if instance is None:
64 110
             return self
@@ -282,7 +328,11 @@ def get_prefetch_query_set(self, instances):
282 328
                     [obj._get_pk_val() for obj in instances]
283 329
                 }
284 330
             qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
285  
-            return (qs, self.object_id_field_name, 'pk')
  331
+            return (qs,
  332
+                    attrgetter(self.object_id_field_name),
  333
+                    lambda obj: obj._get_pk_val(),
  334
+                    False,
  335
+                    self.prefetch_cache_name)
286 336
 
287 337
         def add(self, *objs):
288 338
             for obj in objs:
6  django/contrib/contenttypes/models.py
@@ -113,5 +113,11 @@ def get_object_for_this_type(self, **kwargs):
113 113
         """
114 114
         return self.model_class()._base_manager.using(self._state.db).get(**kwargs)
115 115
 
  116
+    def get_all_objects_for_this_type(self, **kwargs):
  117
+        """
  118
+        Returns all objects of this type for the keyword arguments given.
  119
+        """
  120
+        return self.model_class()._base_manager.using(self._state.db).filter(**kwargs)
  121
+
116 122
     def natural_key(self):
117 123
         return (self.app_label, self.model)
86  django/db/models/fields/related.py
... ...
@@ -1,3 +1,5 @@
@@ -227,6 +229,22 @@ def __init__(self, related):
@@ -234,8 +252,7 @@ def __get__(self, instance, instance_type=None):
@@ -283,14 +300,40 @@ class ReverseSingleRelatedObjectDescriptor(object):
@@ -303,16 +346,9 @@ def __get__(self, instance, instance_type=None):
@@ -425,15 +461,15 @@ def get_query_set(self):
@@ -507,12 +543,6 @@ def get_query_set(self):
@@ -534,7 +564,11 @@ def get_prefetch_query_set(self, instances):
125  django/db/models/query.py
@@ -1612,36 +1612,42 @@ def prefetch_related_objects(result_cache, related_lookups):
1612 1612
                 break
1613 1613
 
1614 1614
             # Descend down tree
1615  
-            try:
1616  
-                rel_obj = getattr(obj_list[0], attr)
1617  
-            except AttributeError:
  1615
+
  1616
+            # We assume that objects retrieved are homogenous (which is the premise
  1617
+            # of prefetch_related), so what applies to first object applies to all.
  1618
+            first_obj = obj_list[0]
  1619
+            prefetcher, attr_found, is_fetched = get_prefetcher(first_obj, attr)
  1620
+
  1621
+            if not attr_found:
1618 1622
                 raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
1619 1623
                                      "parameter to prefetch_related()" %
1620  
-                                     (attr, obj_list[0].__class__.__name__, lookup))
  1624
+                                     (attr, first_obj.__class__.__name__, lookup))
1621 1625
 
1622  
-            can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
1623  
-            if level == len(attrs) - 1 and not can_prefetch:
1624  
-                # Last one, this *must* resolve to a related manager.
1625  
-                raise ValueError("'%s' does not resolve to a supported 'many related"
1626  
-                                 " manager' for model %s - this is an invalid"
1627  
-                                 " parameter to prefetch_related()."
1628  
-                                 % (lookup, model.__name__))
  1626
+            if level == len(attrs) - 1 and prefetcher is None:
  1627
+                # Last one, this *must* resolve to something that supports
  1628
+                # prefetching, otherwise there is no point adding it and the
  1629
+                # developer asking for it has made a mistake.
  1630
+                raise ValueError("'%s' does not resolve to a item that supports "
  1631
+                                 "prefetching - this is an invalid parameter to "
  1632
+                                 "prefetch_related()." % lookup)
1629 1633
 
1630  
-            if can_prefetch:
  1634
+            if prefetcher is not None and not is_fetched:
1631 1635
                 # Check we didn't do this already
1632 1636
                 current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
1633 1637
                 if current_lookup in done_queries:
1634 1638
                     obj_list = done_queries[current_lookup]
1635 1639
                 else:
1636  
-                    relmanager = rel_obj
1637  
-                    obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr)
  1640
+                    obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr)
1638 1641
                     for f in additional_prl:
1639 1642
                         new_prl = LOOKUP_SEP.join([current_lookup, f])
1640 1643
                         related_lookups.append(new_prl)
1641 1644
                     done_queries[current_lookup] = obj_list
1642 1645
             else:
1643  
-                # Assume we've got some singly related object. We replace
1644  
-                # the current list of parent objects with that list.
  1646
+                # Either a singly related object that has already been fetched
  1647
+                # (e.g. via select_related), or hopefully some other property
  1648
+                # that doesn't support prefetching but needs to be traversed.
  1649
+
  1650
+                # We replace the current list of parent objects with that list.
1645 1651
                 obj_list = [getattr(obj, attr) for obj in obj_list]
1646 1652
 
1647 1653
                 # Filter out 'None' so that we can continue with nullable
@@ -1649,18 +1655,73 @@ def prefetch_related_objects(result_cache, related_lookups):
1649 1655
                 obj_list = [obj for obj in obj_list if obj is not None]
1650 1656
 
1651 1657
 
1652  
-def prefetch_one_level(instances, relmanager, attname):
  1658
+def get_prefetcher(instance, attr):
  1659
+    """
  1660
+    For the attribute 'attr' on the given instance, finds
  1661
+    an object that has a get_prefetch_query_set().
  1662
+    Return a 3 tuple containing:
  1663
+    (the object with get_prefetch_query_set (or None),
  1664
+     a boolean that is False if the attribute was not found at all,
  1665
+     a boolean that is True if the attribute has already been fetched)
  1666
+    """
  1667
+    prefetcher = None
  1668
+    attr_found = False
  1669
+    is_fetched = False
  1670
+
  1671
+    # For singly related objects, we have to avoid getting the attribute
  1672
+    # from the object, as this will trigger the query. So we first try
  1673
+    # on the class, in order to get the descriptor object.
  1674
+    rel_obj_descriptor = getattr(instance.__class__, attr, None)
  1675
+    if rel_obj_descriptor is None:
  1676
+        try:
  1677
+            rel_obj = getattr(instance, attr)
  1678
+            attr_found = True
  1679
+        except AttributeError:
  1680
+            pass
  1681
+    else:
  1682
+        attr_found = True
  1683
+        if rel_obj_descriptor:
  1684
+            # singly related object, descriptor object has the
  1685
+            # get_prefetch_query_set() method.
  1686
+            if hasattr(rel_obj_descriptor, 'get_prefetch_query_set'):
  1687
+                prefetcher = rel_obj_descriptor
  1688
+                if rel_obj_descriptor.is_cached(instance):
  1689
+                    is_fetched = True
  1690
+            else:
  1691
+                # descriptor doesn't support prefetching, so we go ahead and get
  1692
+                # the attribute on the instance rather than the class to
  1693
+                # support many related managers
  1694
+                rel_obj = getattr(instance, attr)
  1695
+                if hasattr(rel_obj, 'get_prefetch_query_set'):
  1696
+                    prefetcher = rel_obj
  1697
+    return prefetcher, attr_found, is_fetched
  1698
+
  1699
+
  1700
+def prefetch_one_level(instances, prefetcher, attname):
1653 1701
     """
1654 1702
     Helper function for prefetch_related_objects
1655 1703
 
1656  
-    Runs prefetches on all instances using the manager relmanager,
1657  
-    assigning results to queryset against instance.attname.
  1704
+    Runs prefetches on all instances using the prefetcher object,
  1705
+    assigning results to relevant caches in instance.
1658 1706
 
1659 1707
     The prefetched objects are returned, along with any additional
1660 1708
     prefetches that must be done due to prefetch_related lookups
1661 1709
     found from default managers.
1662 1710
     """
1663  
-    rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances)
  1711
+    # prefetcher must have a method get_prefetch_query_set() which takes a list
  1712
+    # of instances, and returns a tuple:
  1713
+
  1714
+    # (queryset of instances of self.model that are related to passed in instances,
  1715
+    #  callable that gets value to be matched for returned instances,
  1716
+    #  callable that gets value to be matched for passed in instances,
  1717
+    #  boolean that is True for singly related objects,
  1718
+    #  cache name to assign to).
  1719
+
  1720
+    # The 'values to be matched' must be hashable as they will be used
  1721
+    # in a dictionary.
  1722
+
  1723
+    rel_qs, rel_obj_attr, instance_attr, single, cache_name =\
  1724
+        prefetcher.get_prefetch_query_set(instances)
1664 1725
     # We have to handle the possibility that the default manager itself added
1665 1726
     # prefetch_related lookups to the QuerySet we just got back. We don't want to
1666 1727
     # trigger the prefetch_related functionality by evaluating the query.
@@ -1676,17 +1737,25 @@ def prefetch_one_level(instances, relmanager, attname):
1676 1737
 
1677 1738
     rel_obj_cache = {}
1678 1739
     for rel_obj in all_related_objects:
1679  
-        rel_attr_val = getattr(rel_obj, rel_obj_attr)
  1740
+        rel_attr_val = rel_obj_attr(rel_obj)
1680 1741
         if rel_attr_val not in rel_obj_cache:
1681 1742
             rel_obj_cache[rel_attr_val] = []
1682 1743
         rel_obj_cache[rel_attr_val].append(rel_obj)
1683 1744
 
1684 1745
     for obj in instances:
1685  
-        qs = getattr(obj, attname).all()
1686  
-        instance_attr_val = getattr(obj, instance_attr)
1687  
-        qs._result_cache = rel_obj_cache.get(instance_attr_val, [])
1688  
-        # We don't want the individual qs doing prefetch_related now, since we
1689  
-        # have merged this into the current work.
1690  
-        qs._prefetch_done = True
1691  
-        obj._prefetched_objects_cache[attname] = qs
  1746
+        instance_attr_val = instance_attr(obj)
  1747
+        vals = rel_obj_cache.get(instance_attr_val, [])
  1748
+        if single:
  1749
+            # Need to assign to single cache on instance
  1750
+            if vals:
  1751
+                setattr(obj, cache_name, vals[0])
  1752
+        else:
  1753
+            # Multi, attribute represents a manager with an .all() method that
  1754
+            # returns a QuerySet
  1755
+            qs = getattr(obj, attname).all()
  1756
+            qs._result_cache = vals
  1757
+            # We don't want the individual qs doing prefetch_related now, since we
  1758
+            # have merged this into the current work.
  1759
+            qs._prefetch_done = True
  1760
+            obj._prefetched_objects_cache[cache_name] = qs
1692 1761
     return all_related_objects, additional_prl
89  docs/ref/models/querysets.txt
@@ -696,14 +696,26 @@ prefetch_related
696 696
 .. versionadded:: 1.4
697 697
 
698 698
 Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
699  
-related many-to-many and many-to-one objects for each of the specified lookups.
700  
-
701  
-This is similar to ``select_related`` for the 'many related objects' case, but
702  
-note that ``prefetch_related`` causes a separate query to be issued for each set
703  
-of related objects that you request, unlike ``select_related`` which modifies
704  
-the original query with joins in order to get the related objects. With
705  
-``prefetch_related``, the additional queries are done as soon as the QuerySet
706  
-begins to be evaluated.
  699
+related objects for each of the specified lookups.
  700
+
  701
+This has a similar purpose to ``select_related``, in that both are designed to
  702
+stop the deluge of database queries that is caused by accessing related objects,
  703
+but the strategy is quite different.
  704
+
  705
+``select_related`` works by creating a SQL join and including the fields of the
  706
+related object in the SELECT statement. For this reason, ``select_related`` gets
  707
+the related objects in the same database query. However, to avoid the much
  708
+larger result set that would result from joining across a 'many' relationship,
  709
+``select_related`` is limited to single-valued relationships - foreign key and
  710
+one-to-one.
  711
+
  712
+``prefetch_related``, on the other hand, does a separate lookup for each
  713
+relationship, and does the 'joining' in Python. This allows it to prefetch
  714
+many-to-many and many-to-one objects, which cannot be done using
  715
+``select_related``, in addition to the foreign key and one-to-one relationships
  716
+that are supported by ``select_related``. It also supports prefetching of
  717
+:class:`~django.contrib.contenttypes.generic.GenericRelation` and
  718
+:class:`~django.contrib.contenttypes.generic.GenericForeignKey`.
707 719
 
708 720
 For example, suppose you have these models::
709 721
 
@@ -733,14 +745,17 @@ All the relevant toppings will be fetched in a single query, and used to make
733 745
 ``QuerySets`` that have a pre-filled cache of the relevant results. These
734 746
 ``QuerySets`` are then used in the ``self.toppings.all()`` calls.
735 747
 
736  
-Please note that use of ``prefetch_related`` will mean that the additional
737  
-queries run will **always** be executed - even if you never use the related
738  
-objects - and it always fully populates the result cache on the primary
739  
-``QuerySet`` (which can sometimes be avoided in other cases).
  748
+The additional queries are executed after the QuerySet has begun to be evaluated
  749
+and the primary query has been executed. Note that the result cache of the
  750
+primary QuerySet and all specified related objects will then be fully loaded
  751
+into memory, which is often avoided in other cases - even after a query has been
  752
+executed in the database, QuerySet normally tries to make uses of chunking
  753
+between the database to avoid loading all objects into memory before you need
  754
+them.
740 755
 
741 756
 Also remember that, as always with QuerySets, any subsequent chained methods
742  
-will ignore previously cached results, and retrieve data using a fresh database
743  
-query. So, if you write the following:
  757
+which imply a different database query will ignore previously cached results,
  758
+and retrieve data using a fresh database query. So, if you write the following:
744 759
 
745 760
     >>> pizzas = Pizza.objects.prefetch_related('toppings')
746 761
     >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
@@ -749,12 +764,6 @@ query. So, if you write the following:
749 764
 you - in fact it hurts performance, since you have done a database query that
750 765
 you haven't used. So use this feature with caution!
751 766
 
752  
-The lookups that must be supplied to this method can be any attributes on the
753  
-model instances which represent related queries that return multiple
754  
-objects. This includes attributes representing the 'many' side of ``ForeignKey``
755  
-relationships, forward and reverse ``ManyToManyField`` attributes, and also any
756  
-``GenericRelations``.
757  
-
758 767
 You can also use the normal join syntax to do related fields of related
759 768
 fields. Suppose we have an additional model to the example above::
760 769
 
@@ -770,24 +779,40 @@ This will prefetch all pizzas belonging to restaurants, and all toppings
770 779
 belonging to those pizzas. This will result in a total of 3 database queries -
771 780
 one for the restaurants, one for the pizzas, and one for the toppings.
772 781
 
773  
-    >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
  782
+    >>> Restaurant.objects.prefetch_related('best_pizza__toppings')
774 783
 
775 784
 This will fetch the best pizza and all the toppings for the best pizza for each
776  
-restaurant. This will be done in 2 database queries - one for the restaurants
777  
-and 'best pizzas' combined (achieved through use of ``select_related``), and one
778  
-for the toppings.
  785
+restaurant. This will be done in 3 database queries - one for the restaurants,
  786
+one for the 'best pizzas', and one for one for the toppings.
  787
+
  788
+Of course, the ``best_pizza`` relationship could also be fetched using
  789
+``select_related`` to reduce the query count to 2:
  790
+
  791
+    >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
  792
+
  793
+Since the prefetch is executed after the main query (which includes the joins
  794
+needed by ``select_related``), it is able to detect that the ``best_pizza``
  795
+objects have already been fetched, and it will skip fetching them again.
779 796
 
780  
-Chaining ``prefetch_related`` calls will accumulate the fields that should have
781  
-this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
782  
-as a parameter::
  797
+Chaining ``prefetch_related`` calls will accumulate the lookups that are
  798
+prefetched. To clear any ``prefetch_related`` behavior, pass `None` as a
  799
+parameter::
783 800
 
784 801
    >>> non_prefetched = qs.prefetch_related(None)
785 802
 
786  
-One difference when using ``prefetch_related`` is that, in some circumstances,
787  
-objects created by a query can be shared between the different objects that they
788  
-are related to i.e. a single Python model instance can appear at more than one
789  
-point in the tree of objects that are returned. Normally this behavior will not
790  
-be a problem, and will in fact save both memory and CPU time.
  803
+One difference to note when using ``prefetch_related`` is that objects created
  804
+by a query can be shared between the different objects that they are related to
  805
+i.e. a single Python model instance can appear at more than one point in the
  806
+tree of objects that are returned. This will normally happen with foreign key
  807
+relationships. Typically this behavior will not be a problem, and will in fact
  808
+save both memory and CPU time.
  809
+
  810
+While ``prefetch_related`` supports prefetching ``GenericForeignKey``
  811
+relationships, the number of queries will depend on the data. Since a
  812
+``GenericForeignKey`` can reference data in multiple tables, one query per table
  813
+referenced is needed, rather than one query for all the items. There could be
  814
+additional queries on the ``ContentType`` table if the relevant rows have not
  815
+already been fetched.
791 816
 
792 817
 extra
793 818
 ~~~~~
19  docs/releases/1.4.txt
@@ -66,15 +66,18 @@ information.
66 66
 ``QuerySet.prefetch_related``
67 67
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
68 68
 
69  
-Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for
70  
-many-to-many relationships,
  69
+Similar to :meth:`~django.db.models.query.QuerySet.select_related` but with a
  70
+different strategy and broader scope,
71 71
 :meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to
72  
-:class:`~django.db.models.query.QuerySet`. This method returns a new ``QuerySet``
73  
-that will prefetch in a single batch each of the specified related lookups as
74  
-soon as it begins to be evaluated (e.g. by iterating over it). This enables you
75  
-to fix many instances of a very common performance problem, in which your code
76  
-ends up doing O(n) database queries (or worse) if objects on your primary
77  
-``QuerySet`` each have many related objects that you also need.
  72
+:class:`~django.db.models.query.QuerySet`. This method returns a new
  73
+``QuerySet`` that will prefetch in a single batch each of the specified related
  74
+lookups as soon as it begins to be evaluated. Unlike ``select_related``, it does
  75
+the joins in Python, not in the database, and supports many-to-many
  76
+relationships, :class:`~django.contrib.contenttypes.generic.GenericForeignKey`
  77
+and more. This enables you to fix many instances of a very common performance
  78
+problem, in which your code ends up doing O(n) database queries (or worse) if
  79
+objects on your primary ``QuerySet`` each have many related objects that you
  80
+also need.
78 81
 
79 82
 HTML5
80 83
 ~~~~~
6  tests/modeltests/prefetch_related/models.py
@@ -104,13 +104,17 @@ class Meta:
104  tests/modeltests/prefetch_related/tests.py
@@ -54,6 +54,13 @@ def test_m2m_reverse(self):
@@ -175,12 +182,12 @@ def test_attribute_error(self):
@@ -222,39 +229,68 @@ def test_m2m_then_m2m(self):
@@ -311,9 +347,14 @@ def test_m2m_to_inheriting_model(self):
@@ -406,6 +447,8 @@ def setUp(self):
@@ -416,3 +459,16 @@ def test_traverse_nullable(self):

0 notes on commit 052a011

Please sign in to comment.
Something went wrong with that request. Please try again.