Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #16715 -- Fixed join promotion logic for nested nullable FKs

The joins for nested nullable foreign keys were often created as INNER
when they should have been OUTER joins. The reason was that only the
first join in the chain was promoted correctly. There were also issues
with select_related etc.

The basic structure for this problem was:
  A -[nullable]-> B -[nonnull]-> C

And the basic problem was that the A->B join was correctly LOUTER,
the B->C join not.

The major change taken in this patch is that now if we promote a join
A->B, we will automatically promote joins B->X for all X in the query.
Also, we now make sure there aren't ever join chains like:
   a LOUTER b INNER c
If the a -> b needs to be LOUTER, then the INNER at the end of the
chain will cancel the LOUTER join and we have a broken query.

Sebastian reported this problem and did also major portions of the
patch.
  • Loading branch information...
commit 01b9c3d5193fe61b82ae8b26242a13fdec22f211 1 parent d7a2e81
Anssi Kääriäinen authored August 21, 2012
12  django/db/models/sql/compiler.py
@@ -470,9 +470,7 @@ def _setup_joins(self, pieces, opts, alias):
470 470
         # Must use left outer joins for nullable fields and their relations.
471 471
         # Ordering or distinct must not affect the returned set, and INNER
472 472
         # JOINS for nullable fields could do this.
473  
-        if joins_to_promote:
474  
-            self.query.promote_alias_chain(joins_to_promote,
475  
-                self.query.alias_map[joins_to_promote[0]].join_type == self.query.LOUTER)
  473
+        self.query.promote_joins(joins_to_promote)
476 474
         return field, col, alias, joins, opts
477 475
 
478 476
     def _final_join_removal(self, col, alias):
@@ -645,8 +643,6 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
645 643
                     alias_chain.append(alias)
646 644
                     for (dupe_opts, dupe_col) in dupe_set:
647 645
                         self.query.update_dupe_avoidance(dupe_opts, dupe_col, alias)
648  
-                if self.query.alias_map[root_alias].join_type == self.query.LOUTER:
649  
-                    self.query.promote_alias_chain(alias_chain, True)
650 646
             else:
651 647
                 alias = root_alias
652 648
 
@@ -663,8 +659,6 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
663 659
             columns, aliases = self.get_default_columns(start_alias=alias,
664 660
                     opts=f.rel.to._meta, as_pairs=True)
665 661
             self.query.related_select_cols.extend(columns)
666  
-            if self.query.alias_map[alias].join_type == self.query.LOUTER:
667  
-                self.query.promote_alias_chain(aliases, True)
668 662
             self.query.related_select_fields.extend(f.rel.to._meta.fields)
669 663
             if restricted:
670 664
                 next = requested.get(f.name, {})
@@ -738,7 +732,9 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
738 732
                 self.query.related_select_fields.extend(model._meta.fields)
739 733
 
740 734
                 next = requested.get(f.related_query_name(), {})
741  
-                new_nullable = f.null or None
  735
+                # Use True here because we are looking at the _reverse_ side of
  736
+                # the relation, which is always nullable.
  737
+                new_nullable = True
742 738
 
743 739
                 self.fill_related_selections(model._meta, table, cur_depth+1,
744 740
                     used, next, restricted, new_nullable)
97  django/db/models/sql/query.py
@@ -505,7 +505,7 @@ def combine(self, rhs, connector):
505 505
                 # Again, some of the tables won't have aliases due to
506 506
                 # the trimming of unnecessary tables.
507 507
                 if self.alias_refcount.get(alias) or rhs.alias_refcount.get(alias):
508  
-                    self.promote_alias(alias, True)
  508
+                    self.promote_joins([alias], True)
509 509
 
510 510
         # Now relabel a copy of the rhs where-clause and add it to the current
511 511
         # one.
@@ -682,32 +682,38 @@ def unref_alias(self, alias, amount=1):
682 682
         """ Decreases the reference count for this alias. """
683 683
         self.alias_refcount[alias] -= amount
684 684
 
685  
-    def promote_alias(self, alias, unconditional=False):
686  
-        """
687  
-        Promotes the join type of an alias to an outer join if it's possible
688  
-        for the join to contain NULL values on the left. If 'unconditional' is
689  
-        False, the join is only promoted if it is nullable, otherwise it is
690  
-        always promoted.
691  
-
692  
-        Returns True if the join was promoted by this call.
693  
-        """
694  
-        if ((unconditional or self.alias_map[alias].nullable) and
695  
-                self.alias_map[alias].join_type != self.LOUTER):
696  
-            data = self.alias_map[alias]
697  
-            data = data._replace(join_type=self.LOUTER)
698  
-            self.alias_map[alias] = data
699  
-            return True
700  
-        return False
701  
-
702  
-    def promote_alias_chain(self, chain, must_promote=False):
703  
-        """
704  
-        Walks along a chain of aliases, promoting the first nullable join and
705  
-        any joins following that. If 'must_promote' is True, all the aliases in
706  
-        the chain are promoted.
707  
-        """
708  
-        for alias in chain:
709  
-            if self.promote_alias(alias, must_promote):
710  
-                must_promote = True
  685
+    def promote_joins(self, aliases, unconditional=False):
  686
+        """
  687
+        Promotes recursively the join type of given aliases and its children to
  688
+        an outer join. If 'unconditional' is False, the join is only promoted if
  689
+        it is nullable or the parent join is an outer join.
  690
+
  691
+        Note about join promotion: When promoting any alias, we make sure all
  692
+        joins which start from that alias are promoted, too. When adding a join
  693
+        in join(), we make sure any join added to already existing LOUTER join
  694
+        is generated as LOUTER. This ensures we don't ever have broken join
  695
+        chains which contain first a LOUTER join, then an INNER JOIN, that is
  696
+        this kind of join should never be generated: a LOUTER b INNER c. The
  697
+        reason for avoiding this type of join chain is that the INNER after
  698
+        the LOUTER will effectively remove any effect the LOUTER had.
  699
+        """
  700
+        aliases = list(aliases)
  701
+        while aliases:
  702
+            alias = aliases.pop(0)
  703
+            parent_alias = self.alias_map[alias].lhs_alias
  704
+            parent_louter = (parent_alias
  705
+                and self.alias_map[parent_alias].join_type == self.LOUTER)
  706
+            already_louter = self.alias_map[alias].join_type == self.LOUTER
  707
+            if ((unconditional or self.alias_map[alias].nullable
  708
+                 or parent_louter) and not already_louter):
  709
+                data = self.alias_map[alias]._replace(join_type=self.LOUTER)
  710
+                self.alias_map[alias] = data
  711
+                # Join type of 'alias' changed, so re-examine all aliases that
  712
+                # refer to this one.
  713
+                aliases.extend(
  714
+                    join for join in self.alias_map.keys()
  715
+                    if (self.alias_map[join].lhs_alias == alias
  716
+                        and join not in aliases))
711 717
 
712 718
     def reset_refcounts(self, to_counts):
713 719
         """
@@ -726,19 +732,10 @@ def promote_unused_aliases(self, initial_refcounts, used_aliases):
726 732
         then and which ones haven't been used and promotes all of those
727 733
         aliases, plus any children of theirs in the alias tree, to outer joins.
728 734
         """
729  
-        # FIXME: There's some (a lot of!) overlap with the similar OR promotion
730  
-        # in add_filter(). It's not quite identical, but is very similar. So
731  
-        # pulling out the common bits is something for later.
732  
-        considered = {}
733 735
         for alias in self.tables:
734  
-            if alias not in used_aliases:
735  
-                continue
736  
-            if (alias not in initial_refcounts or
  736
+            if alias in used_aliases and (alias not in initial_refcounts or
737 737
                     self.alias_refcount[alias] == initial_refcounts[alias]):
738  
-                parent = self.alias_map[alias].lhs_alias
739  
-                must_promote = considered.get(parent, False)
740  
-                promoted = self.promote_alias(alias, must_promote)
741  
-                considered[alias] = must_promote or promoted
  738
+                self.promote_joins([alias])
742 739
 
743 740
     def change_aliases(self, change_map):
744 741
         """
@@ -875,6 +872,9 @@ def join(self, connection, always_create=False, exclusions=(),
875 872
         LOUTER join type. This is used when joining certain types of querysets
876 873
         and Q-objects together.
877 874
 
  875
+        A join is always created as LOUTER if the lhs alias is LOUTER to make
  876
+        sure we do not generate chains like a LOUTER b INNER c.
  877
+
878 878
         If 'nullable' is True, the join can potentially involve NULL values and
879 879
         is a candidate for promotion (to "left outer") when combining querysets.
880 880
         """
@@ -900,8 +900,8 @@ def join(self, connection, always_create=False, exclusions=(),
900 900
                     if self.alias_map[alias].lhs_alias != lhs:
901 901
                         continue
902 902
                     self.ref_alias(alias)
903  
-                    if promote:
904  
-                        self.promote_alias(alias)
  903
+                    if promote or (lhs and self.alias_map[lhs].join_type == self.LOUTER):
  904
+                        self.promote_joins([alias])
905 905
                     return alias
906 906
 
907 907
         # No reuse is possible, so we need a new alias.
@@ -1009,8 +1009,7 @@ def add_aggregate(self, aggregate, model, alias, is_summary):
1009 1009
             # If the aggregate references a model or field that requires a join,
1010 1010
             # those joins must be LEFT OUTER - empty join rows must be returned
1011 1011
             # in order for zeros to be returned for those aggregates.
1012  
-            for column_alias in join_list:
1013  
-                self.promote_alias(column_alias, unconditional=True)
  1012
+            self.promote_joins(join_list, True)
1014 1013
 
1015 1014
             col = (join_list[-1], col)
1016 1015
         else:
@@ -1129,7 +1128,7 @@ def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
1129 1128
             # If the comparison is against NULL, we may need to use some left
1130 1129
             # outer joins when creating the join chain. This is only done when
1131 1130
             # needed, as it's less efficient at the database level.
1132  
-            self.promote_alias_chain(join_list)
  1131
+            self.promote_joins(join_list)
1133 1132
             join_promote = True
1134 1133
 
1135 1134
         # Process the join list to see if we can remove any inner joins from
@@ -1160,16 +1159,16 @@ def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
1160 1159
                     # This means that we are dealing with two different query
1161 1160
                     # subtrees, so we don't need to do any join promotion.
1162 1161
                     continue
1163  
-                join_promote = join_promote or self.promote_alias(join, unconditional)
  1162
+                join_promote = join_promote or self.promote_joins([join], unconditional)
1164 1163
                 if table != join:
1165  
-                    table_promote = self.promote_alias(table)
  1164
+                    table_promote = self.promote_joins([table])
1166 1165
                 # We only get here if we have found a table that exists
1167 1166
                 # in the join list, but isn't on the original tables list.
1168 1167
                 # This means we've reached the point where we only have
1169 1168
                 # new tables, so we can break out of this promotion loop.
1170 1169
                 break
1171  
-            self.promote_alias_chain(join_it, join_promote)
1172  
-            self.promote_alias_chain(table_it, table_promote or join_promote)
  1170
+            self.promote_joins(join_it, join_promote)
  1171
+            self.promote_joins(table_it, table_promote or join_promote)
1173 1172
 
1174 1173
         if having_clause or force_having:
1175 1174
             if (alias, col) not in self.group_by:
@@ -1181,7 +1180,7 @@ def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
1181 1180
                 connector)
1182 1181
 
1183 1182
         if negate:
1184  
-            self.promote_alias_chain(join_list)
  1183
+            self.promote_joins(join_list)
1185 1184
             if lookup_type != 'isnull':
1186 1185
                 if len(join_list) > 1:
1187 1186
                     for alias in join_list:
@@ -1655,7 +1654,7 @@ def add_fields(self, field_names, allow_m2m=True):
1655 1654
                         final_alias = join.lhs_alias
1656 1655
                         col = join.lhs_join_col
1657 1656
                         joins = joins[:-1]
1658  
-                self.promote_alias_chain(joins[1:])
  1657
+                self.promote_joins(joins[1:])
1659 1658
                 self.select.append((final_alias, col))
1660 1659
                 self.select_fields.append(field)
1661 1660
         except MultiJoin:
0  tests/regressiontests/nested_foreign_keys/__init__.py
No changes.
28  tests/regressiontests/nested_foreign_keys/models.py
... ...
@@ -0,0 +1,28 @@
  1
+from django.db import models
  2
+
  3
+
  4
+class Person(models.Model):
  5
+    name = models.CharField(max_length=200)
  6
+
  7
+
  8
+class Movie(models.Model):
  9
+    title = models.CharField(max_length=200)
  10
+    director = models.ForeignKey(Person)
  11
+
  12
+
  13
+class Event(models.Model):
  14
+    pass
  15
+
  16
+
  17
+class Screening(Event):
  18
+    movie = models.ForeignKey(Movie)
  19
+
  20
+class ScreeningNullFK(Event):
  21
+    movie = models.ForeignKey(Movie, null=True)
  22
+
  23
+
  24
+class Package(models.Model):
  25
+    screening = models.ForeignKey(Screening, null=True)
  26
+
  27
+class PackageNullFK(models.Model):
  28
+    screening = models.ForeignKey(ScreeningNullFK, null=True)
166  tests/regressiontests/nested_foreign_keys/tests.py
... ...
@@ -0,0 +1,166 @@
  1
+from __future__ import absolute_import
  2
+from django.test import TestCase
  3
+
  4
+from .models import Person, Movie, Event, Screening, ScreeningNullFK, Package, PackageNullFK
  5
+
  6
+
  7
+# These are tests for #16715. The basic scheme is always the same: 3 models with
  8
+# 2 relations. The first relation may be null, while the second is non-nullable.
  9
+# In some cases, Django would pick the wrong join type for the second relation,
  10
+# resulting in missing objects in the queryset.
  11
+#
  12
+#   Model A
  13
+#   | (Relation A/B : nullable)
  14
+#   Model B
  15
+#   | (Relation B/C : non-nullable)
  16
+#   Model C
  17
+#
  18
+# Because of the possibility of NULL rows resulting from the LEFT OUTER JOIN
  19
+# between Model A and Model B (i.e. instances of A without reference to B),
  20
+# the second join must also be LEFT OUTER JOIN, so that we do not ignore
  21
+# instances of A that do not reference B.
  22
+#
  23
+# Relation A/B can either be an explicit foreign key or an implicit reverse
  24
+# relation such as introduced by one-to-one relations (through multi-table
  25
+# inheritance).
  26
+class NestedForeignKeysTests(TestCase):
  27
+    def setUp(self):
  28
+        self.director = Person.objects.create(name='Terry Gilliam / Terry Jones')
  29
+        self.movie = Movie.objects.create(title='Monty Python and the Holy Grail', director=self.director)
  30
+
  31
+
  32
+    # This test failed in #16715 because in some cases INNER JOIN was selected
  33
+    # for the second foreign key relation instead of LEFT OUTER JOIN.
  34
+    def testInheritance(self):
  35
+        some_event = Event.objects.create()
  36
+        screening = Screening.objects.create(movie=self.movie)
  37
+
  38
+        self.assertEqual(len(Event.objects.all()), 2)
  39
+        self.assertEqual(len(Event.objects.select_related('screening')), 2)
  40
+        # This failed.
  41
+        self.assertEqual(len(Event.objects.select_related('screening__movie')), 2)
  42
+
  43
+        self.assertEqual(len(Event.objects.values()), 2)
  44
+        self.assertEqual(len(Event.objects.values('screening__pk')), 2)
  45
+        self.assertEqual(len(Event.objects.values('screening__movie__pk')), 2)
  46
+        self.assertEqual(len(Event.objects.values('screening__movie__title')), 2)
  47
+        # This failed.
  48
+        self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__title')), 2)
  49
+
  50
+        # Simple filter/exclude queries for good measure.
  51
+        self.assertEqual(Event.objects.filter(screening__movie=self.movie).count(), 1)
  52
+        self.assertEqual(Event.objects.exclude(screening__movie=self.movie).count(), 1)
  53
+
  54
+
  55
+    # These all work because the second foreign key in the chain has null=True.
  56
+    def testInheritanceNullFK(self):
  57
+        some_event = Event.objects.create()
  58
+        screening = ScreeningNullFK.objects.create(movie=None)
  59
+        screening_with_movie = ScreeningNullFK.objects.create(movie=self.movie)
  60
+
  61
+        self.assertEqual(len(Event.objects.all()), 3)
  62
+        self.assertEqual(len(Event.objects.select_related('screeningnullfk')), 3)
  63
+        self.assertEqual(len(Event.objects.select_related('screeningnullfk__movie')), 3)
  64
+
  65
+        self.assertEqual(len(Event.objects.values()), 3)
  66
+        self.assertEqual(len(Event.objects.values('screeningnullfk__pk')), 3)
  67
+        self.assertEqual(len(Event.objects.values('screeningnullfk__movie__pk')), 3)
  68
+        self.assertEqual(len(Event.objects.values('screeningnullfk__movie__title')), 3)
  69
+        self.assertEqual(len(Event.objects.values('screeningnullfk__movie__pk', 'screeningnullfk__movie__title')), 3)
  70
+
  71
+        self.assertEqual(Event.objects.filter(screeningnullfk__movie=self.movie).count(), 1)
  72
+        self.assertEqual(Event.objects.exclude(screeningnullfk__movie=self.movie).count(), 2)
  73
+
  74
+
  75
+    # This test failed in #16715 because in some cases INNER JOIN was selected
  76
+    # for the second foreign key relation instead of LEFT OUTER JOIN.
  77
+    def testExplicitForeignKey(self):
  78
+        package = Package.objects.create()
  79
+        screening = Screening.objects.create(movie=self.movie)
  80
+        package_with_screening = Package.objects.create(screening=screening)
  81
+
  82
+        self.assertEqual(len(Package.objects.all()), 2)
  83
+        self.assertEqual(len(Package.objects.select_related('screening')), 2)
  84
+        self.assertEqual(len(Package.objects.select_related('screening__movie')), 2)
  85
+
  86
+        self.assertEqual(len(Package.objects.values()), 2)
  87
+        self.assertEqual(len(Package.objects.values('screening__pk')), 2)
  88
+        self.assertEqual(len(Package.objects.values('screening__movie__pk')), 2)
  89
+        self.assertEqual(len(Package.objects.values('screening__movie__title')), 2)
  90
+        # This failed.
  91
+        self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__title')), 2)
  92
+
  93
+        self.assertEqual(Package.objects.filter(screening__movie=self.movie).count(), 1)
  94
+        self.assertEqual(Package.objects.exclude(screening__movie=self.movie).count(), 1)
  95
+
  96
+
  97
+    # These all work because the second foreign key in the chain has null=True.
  98
+    def testExplicitForeignKeyNullFK(self):
  99
+        package = PackageNullFK.objects.create()
  100
+        screening = ScreeningNullFK.objects.create(movie=None)
  101
+        screening_with_movie = ScreeningNullFK.objects.create(movie=self.movie)
  102
+        package_with_screening = PackageNullFK.objects.create(screening=screening)
  103
+        package_with_screening_with_movie = PackageNullFK.objects.create(screening=screening_with_movie)
  104
+
  105
+        self.assertEqual(len(PackageNullFK.objects.all()), 3)
  106
+        self.assertEqual(len(PackageNullFK.objects.select_related('screening')), 3)
  107
+        self.assertEqual(len(PackageNullFK.objects.select_related('screening__movie')), 3)
  108
+
  109
+        self.assertEqual(len(PackageNullFK.objects.values()), 3)
  110
+        self.assertEqual(len(PackageNullFK.objects.values('screening__pk')), 3)
  111
+        self.assertEqual(len(PackageNullFK.objects.values('screening__movie__pk')), 3)
  112
+        self.assertEqual(len(PackageNullFK.objects.values('screening__movie__title')), 3)
  113
+        self.assertEqual(len(PackageNullFK.objects.values('screening__movie__pk', 'screening__movie__title')), 3)
  114
+
  115
+        self.assertEqual(PackageNullFK.objects.filter(screening__movie=self.movie).count(), 1)
  116
+        self.assertEqual(PackageNullFK.objects.exclude(screening__movie=self.movie).count(), 2)
  117
+
  118
+
  119
+# Some additional tests for #16715. The only difference is the depth of the
  120
+# nesting as we now use 4 models instead of 3 (and thus 3 relations). This
  121
+# checks if promotion of join types works for deeper nesting too.
  122
+class DeeplyNestedForeignKeysTests(TestCase):
  123
+    def setUp(self):
  124
+        self.director = Person.objects.create(name='Terry Gilliam / Terry Jones')
  125
+        self.movie = Movie.objects.create(title='Monty Python and the Holy Grail', director=self.director)
  126
+
  127
+
  128
+    def testInheritance(self):
  129
+        some_event = Event.objects.create()
  130
+        screening = Screening.objects.create(movie=self.movie)
  131
+
  132
+        self.assertEqual(len(Event.objects.all()), 2)
  133
+        self.assertEqual(len(Event.objects.select_related('screening__movie__director')), 2)
  134
+
  135
+        self.assertEqual(len(Event.objects.values()), 2)
  136
+        self.assertEqual(len(Event.objects.values('screening__movie__director__pk')), 2)
  137
+        self.assertEqual(len(Event.objects.values('screening__movie__director__name')), 2)
  138
+        self.assertEqual(len(Event.objects.values('screening__movie__director__pk', 'screening__movie__director__name')), 2)
  139
+        self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__director__pk')), 2)
  140
+        self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__director__name')), 2)
  141
+        self.assertEqual(len(Event.objects.values('screening__movie__title', 'screening__movie__director__pk')), 2)
  142
+        self.assertEqual(len(Event.objects.values('screening__movie__title', 'screening__movie__director__name')), 2)
  143
+
  144
+        self.assertEqual(Event.objects.filter(screening__movie__director=self.director).count(), 1)
  145
+        self.assertEqual(Event.objects.exclude(screening__movie__director=self.director).count(), 1)
  146
+
  147
+
  148
+    def testExplicitForeignKey(self):
  149
+        package = Package.objects.create()
  150
+        screening = Screening.objects.create(movie=self.movie)
  151
+        package_with_screening = Package.objects.create(screening=screening)
  152
+
  153
+        self.assertEqual(len(Package.objects.all()), 2)
  154
+        self.assertEqual(len(Package.objects.select_related('screening__movie__director')), 2)
  155
+
  156
+        self.assertEqual(len(Package.objects.values()), 2)
  157
+        self.assertEqual(len(Package.objects.values('screening__movie__director__pk')), 2)
  158
+        self.assertEqual(len(Package.objects.values('screening__movie__director__name')), 2)
  159
+        self.assertEqual(len(Package.objects.values('screening__movie__director__pk', 'screening__movie__director__name')), 2)
  160
+        self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__director__pk')), 2)
  161
+        self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__director__name')), 2)
  162
+        self.assertEqual(len(Package.objects.values('screening__movie__title', 'screening__movie__director__pk')), 2)
  163
+        self.assertEqual(len(Package.objects.values('screening__movie__title', 'screening__movie__director__name')), 2)
  164
+
  165
+        self.assertEqual(Package.objects.filter(screening__movie__director=self.director).count(), 1)
  166
+        self.assertEqual(Package.objects.exclude(screening__movie__director=self.director).count(), 1)

0 notes on commit 01b9c3d

Please sign in to comment.
Something went wrong with that request. Please try again.