Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #18854 -- Join promotion in disjunction cases

The added promotion logic is based on promoting any joins used in only
some of the childs of an OR clause unless the join existed before the
OR clause addition.
  • Loading branch information...
commit d407164c0499c234ec043f5720b3209311b2f4e9 1 parent 3dcd435
Anssi Kääriäinen authored August 25, 2012
106  django/db/models/sql/query.py
@@ -772,17 +772,37 @@ def reset_refcounts(self, to_counts):
772 772
             unref_amount = cur_refcount - to_counts.get(alias, 0)
773 773
             self.unref_alias(alias, unref_amount)
774 774
 
775  
-    def promote_unused_aliases(self, initial_refcounts, used_aliases):
  775
+    def promote_disjunction(self, aliases_before, alias_usage_counts,
  776
+                            num_childs):
776 777
         """
777  
-        Given a "before" copy of the alias_refcounts dictionary (as
778  
-        'initial_refcounts') and a collection of aliases that may have been
779  
-        changed or created, works out which aliases have been created since
780  
-        then and which ones haven't been used and promotes all of those
781  
-        aliases, plus any children of theirs in the alias tree, to outer joins.
  778
+        This method is to be used for promoting joins in ORed filters.
  779
+
  780
+        The principle for promotion is: any alias which is used (it is in
  781
+        alias_usage_counts), is not used by every child of the ORed filter,
  782
+        and isn't pre-existing needs to be promoted to LOUTER join.
  783
+
  784
+        Some examples (assume all joins used are nullable):
  785
+            - existing filter: a__f1=foo
  786
+            - add filter: b__f1=foo|b__f2=foo
  787
+            In this case we should not promote either of the joins (using INNER
  788
+            doesn't remove results). We correctly avoid join promotion, because
  789
+            a is not used in this branch, and b is used two times.
  790
+
  791
+            - add filter a__f1=foo|b__f2=foo
  792
+            In this case we should promote both a and b, otherwise they will
  793
+            remove results. We will also correctly do that as both aliases are
  794
+            used, and in addition both are used only once while there are two
  795
+            filters.
  796
+
  797
+            - existing: a__f1=bar
  798
+            - add filter: a__f2=foo|b__f2=foo
  799
+            We will not promote a as it is previously used. If the join results
  800
+            in null, the existing filter can't succeed.
  801
+
  802
+        The above (and some more) are tested in queries.DisjunctionPromotionTests
782 803
         """
783  
-        for alias in self.tables:
784  
-            if alias in used_aliases and (alias not in initial_refcounts or
785  
-                    self.alias_refcount[alias] == initial_refcounts[alias]):
  804
+        for alias, use_count in alias_usage_counts.items():
  805
+            if use_count < num_childs and alias not in aliases_before:
786 806
                 self.promote_joins([alias])
787 807
 
788 808
     def change_aliases(self, change_map):
@@ -1150,16 +1170,12 @@ def add_filter(self, filter_expr, connector=AND, negate=False,
1150 1170
                     can_reuse)
1151 1171
             return
1152 1172
 
1153  
-        table_promote = False
1154  
-        join_promote = False
1155  
-
1156 1173
         if (lookup_type == 'isnull' and value is True and not negate and
1157 1174
                 len(join_list) > 1):
1158 1175
             # If the comparison is against NULL, we may need to use some left
1159 1176
             # outer joins when creating the join chain. This is only done when
1160 1177
             # needed, as it's less efficient at the database level.
1161 1178
             self.promote_joins(join_list)
1162  
-            join_promote = True
1163 1179
 
1164 1180
         # Process the join list to see if we can remove any inner joins from
1165 1181
         # the far end (fewer tables in a query is better). Note that join
@@ -1167,39 +1183,6 @@ def add_filter(self, filter_expr, connector=AND, negate=False,
1167 1183
         # information available when reusing joins.
1168 1184
         col, alias, join_list = self.trim_joins(target, join_list, path)
1169 1185
 
1170  
-        if connector == OR:
1171  
-            # Some joins may need to be promoted when adding a new filter to a
1172  
-            # disjunction. We walk the list of new joins and where it diverges
1173  
-            # from any previous joins (ref count is 1 in the table list), we
1174  
-            # make the new additions (and any existing ones not used in the new
1175  
-            # join list) an outer join.
1176  
-            join_it = iter(join_list)
1177  
-            table_it = iter(self.tables)
1178  
-            next(join_it), next(table_it)
1179  
-            unconditional = False
1180  
-            for join in join_it:
1181  
-                table = next(table_it)
1182  
-                # Once we hit an outer join, all subsequent joins must
1183  
-                # also be promoted, regardless of whether they have been
1184  
-                # promoted as a result of this pass through the tables.
1185  
-                unconditional = (unconditional or
1186  
-                    self.alias_map[join].join_type == self.LOUTER)
1187  
-                if join == table and self.alias_refcount[join] > 1:
1188  
-                    # We have more than one reference to this join table.
1189  
-                    # This means that we are dealing with two different query
1190  
-                    # subtrees, so we don't need to do any join promotion.
1191  
-                    continue
1192  
-                join_promote = join_promote or self.promote_joins([join], unconditional)
1193  
-                if table != join:
1194  
-                    table_promote = self.promote_joins([table])
1195  
-                # We only get here if we have found a table that exists
1196  
-                # in the join list, but isn't on the original tables list.
1197  
-                # This means we've reached the point where we only have
1198  
-                # new tables, so we can break out of this promotion loop.
1199  
-                break
1200  
-            self.promote_joins(join_it, join_promote)
1201  
-            self.promote_joins(table_it, table_promote or join_promote)
1202  
-
1203 1186
         if having_clause or force_having:
1204 1187
             if (alias, col) not in self.group_by:
1205 1188
                 self.group_by.append((alias, col))
@@ -1256,33 +1239,36 @@ def add_q(self, q_object, used_aliases=None, force_having=False):
1256 1239
                 subtree = True
1257 1240
             else:
1258 1241
                 subtree = False
1259  
-            connector = AND
  1242
+            connector = q_object.connector
  1243
+            if connector == OR:
  1244
+                alias_usage_counts = dict()
  1245
+                aliases_before = set(self.tables)
1260 1246
             if q_object.connector == OR and not force_having:
1261 1247
                 force_having = self.need_force_having(q_object)
1262 1248
             for child in q_object.children:
1263  
-                if connector == OR:
1264  
-                    refcounts_before = self.alias_refcount.copy()
1265 1249
                 if force_having:
1266 1250
                     self.having.start_subtree(connector)
1267 1251
                 else:
1268 1252
                     self.where.start_subtree(connector)
  1253
+                if connector == OR:
  1254
+                    refcounts_before = self.alias_refcount.copy()
1269 1255
                 if isinstance(child, Node):
1270 1256
                     self.add_q(child, used_aliases, force_having=force_having)
1271 1257
                 else:
1272 1258
                     self.add_filter(child, connector, q_object.negated,
1273 1259
                             can_reuse=used_aliases, force_having=force_having)
  1260
+                if connector == OR:
  1261
+                    used = alias_diff(refcounts_before, self.alias_refcount)
  1262
+                    for alias in used:
  1263
+                        alias_usage_counts[alias] = alias_usage_counts.get(alias, 0) + 1
1274 1264
                 if force_having:
1275 1265
                     self.having.end_subtree()
1276 1266
                 else:
1277 1267
                     self.where.end_subtree()
1278 1268
 
1279  
-                if connector == OR:
1280  
-                    # Aliases that were newly added or not used at all need to
1281  
-                    # be promoted to outer joins if they are nullable relations.
1282  
-                    # (they shouldn't turn the whole conditional into the empty
1283  
-                    # set just because they don't match anything).
1284  
-                    self.promote_unused_aliases(refcounts_before, used_aliases)
1285  
-                connector = q_object.connector
  1269
+            if connector == OR:
  1270
+                self.promote_disjunction(aliases_before, alias_usage_counts,
  1271
+                                         len(q_object.children))
1286 1272
             if q_object.negated:
1287 1273
                 self.where.negate()
1288 1274
             if subtree:
@@ -2005,3 +1991,11 @@ def is_reverse_o2o(field):
2005 1991
     expected to be some sort of relation field or related object.
2006 1992
     """
2007 1993
     return not hasattr(field, 'rel') and field.field.unique
  1994
+
  1995
+def alias_diff(refcounts_before, refcounts_after):
  1996
+    """
  1997
+    Given the before and after copies of refcounts works out which aliases
  1998
+    have been added to the after copy.
  1999
+    """
  2000
+    return set(t for t in refcounts_after
  2001
+               if refcounts_after[t] > refcounts_before.get(t, 0))
18  tests/regressiontests/queries/models.py
@@ -421,3 +421,21 @@ class Responsibility(models.Model):
421 421
 
422 422
     def __str__(self):
423 423
         return self.description
  424
+
  425
+# Models for disjunction join promotion low level testing.
  426
+class FK1(models.Model):
  427
+    f1 = models.TextField()
  428
+    f2 = models.TextField()
  429
+
  430
+class FK2(models.Model):
  431
+    f1 = models.TextField()
  432
+    f2 = models.TextField()
  433
+
  434
+class FK3(models.Model):
  435
+    f1 = models.TextField()
  436
+    f2 = models.TextField()
  437
+
  438
+class BaseA(models.Model):
  439
+    a = models.ForeignKey(FK1, null=True)
  440
+    b = models.ForeignKey(FK2, null=True)
  441
+    c = models.ForeignKey(FK3, null=True)
130  tests/regressiontests/queries/tests.py
@@ -8,8 +8,8 @@
8 8
 from django.conf import settings
9 9
 from django.core.exceptions import FieldError
10 10
 from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
11  
-from django.db.models import Count
12  
-from django.db.models.query import Q, ITER_CHUNK_SIZE, EmptyQuerySet
  11
+from django.db.models import Count, F, Q
  12
+from django.db.models.query import ITER_CHUNK_SIZE, EmptyQuerySet
13 13
 from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
14 14
 from django.db.models.sql.datastructures import EmptyResultSet
15 15
 from django.test import TestCase, skipUnlessDBFeature
@@ -24,7 +24,7 @@
24 24
     Node, ObjectA, ObjectB, ObjectC, CategoryItem, SimpleCategory,
25 25
     SpecialCategory, OneToOneCategory, NullableName, ProxyCategory,
26 26
     SingleObject, RelatedObject, ModelA, ModelD, Responsibility, Job,
27  
-    JobResponsibilities)
  27
+    JobResponsibilities, BaseA)
28 28
 
29 29
 
30 30
 class BaseQuerysetTest(TestCase):
@@ -2451,3 +2451,127 @@ def test_revo2o_reuse(self):
2451 2451
     def test_revfk_noreuse(self):
2452 2452
         qs = Author.objects.filter(report__name='r4').filter(report__name='r1')
2453 2453
         self.assertEqual(str(qs.query).count('JOIN'), 2)
  2454
+
  2455
+class DisjunctionPromotionTests(TestCase):
  2456
+    def test_disjunction_promotion1(self):
  2457
+        # Pre-existing join, add two ORed filters to the same join,
  2458
+        # all joins can be INNER JOINS.
  2459
+        qs = BaseA.objects.filter(a__f1='foo')
  2460
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2461
+        qs = qs.filter(Q(b__f1='foo') | Q(b__f2='foo'))
  2462
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 2)
  2463
+        # Reverse the order of AND and OR filters.
  2464
+        qs = BaseA.objects.filter(Q(b__f1='foo') | Q(b__f2='foo'))
  2465
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2466
+        qs = qs.filter(a__f1='foo')
  2467
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 2)
  2468
+
  2469
+    def test_disjunction_promotion2(self):
  2470
+        qs = BaseA.objects.filter(a__f1='foo')
  2471
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2472
+        # Now we have two different joins in an ORed condition, these
  2473
+        # must be OUTER joins. The pre-existing join should remain INNER.
  2474
+        qs = qs.filter(Q(b__f1='foo') | Q(c__f2='foo'))
  2475
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2476
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
  2477
+        # Reverse case.
  2478
+        qs = BaseA.objects.filter(Q(b__f1='foo') | Q(c__f2='foo'))
  2479
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
  2480
+        qs = qs.filter(a__f1='foo')
  2481
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2482
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
  2483
+
  2484
+    def test_disjunction_promotion3(self):
  2485
+        qs = BaseA.objects.filter(a__f2='bar')
  2486
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2487
+        # The ANDed a__f2 filter allows us to use keep using INNER JOIN
  2488
+        # even inside the ORed case. If the join to a__ returns nothing,
  2489
+        # the ANDed filter for a__f2 can't be true.
  2490
+        qs = qs.filter(Q(a__f1='foo') | Q(b__f2='foo'))
  2491
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2492
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2493
+
  2494
+    @unittest.expectedFailure
  2495
+    def test_disjunction_promotion3_failing(self):
  2496
+        # Now the ORed filter creates LOUTER join, but we do not have
  2497
+        # logic to unpromote it for the AND filter after it. The query
  2498
+        # results will be correct, but we have one LOUTER JOIN too much
  2499
+        # currently.
  2500
+        qs = BaseA.objects.filter(
  2501
+            Q(a__f1='foo') | Q(b__f2='foo')).filter(a__f2='bar')
  2502
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2503
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2504
+
  2505
+    def test_disjunction_promotion4(self):
  2506
+        qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
  2507
+        self.assertEqual(str(qs.query).count('JOIN'), 0)
  2508
+        qs = qs.filter(a__f1='foo')
  2509
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2510
+        qs = BaseA.objects.filter(a__f1='foo')
  2511
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2512
+        qs = qs.filter(Q(a=1) | Q(a=2))
  2513
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2514
+
  2515
+    def test_disjunction_promotion5(self):
  2516
+        qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
  2517
+        # Note that the above filters on a force the join to an
  2518
+        # inner join even if it is trimmed.
  2519
+        self.assertEqual(str(qs.query).count('JOIN'), 0)
  2520
+        qs = qs.filter(Q(a__f1='foo') | Q(b__f1='foo'))
  2521
+        # So, now the a__f1 join doesn't need promotion.
  2522
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2523
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2524
+
  2525
+    @unittest.expectedFailure
  2526
+    def test_disjunction_promotion5_failing(self):
  2527
+        qs = BaseA.objects.filter(Q(a__f1='foo') | Q(b__f1='foo'))
  2528
+        # Now the join to a is created as LOUTER
  2529
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0)
  2530
+        # The below filter should force the a to be inner joined. But,
  2531
+        # this is failing as we do not have join unpromotion logic.
  2532
+        qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
  2533
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2534
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2535
+
  2536
+    def test_disjunction_promotion6(self):
  2537
+        qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
  2538
+        self.assertEqual(str(qs.query).count('JOIN'), 0)
  2539
+        qs = BaseA.objects.filter(Q(a__f1='foo') & Q(b__f1='foo'))
  2540
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 2)
  2541
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0)
  2542
+
  2543
+        qs = BaseA.objects.filter(Q(a__f1='foo') & Q(b__f1='foo'))
  2544
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0)
  2545
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 2)
  2546
+        qs = qs.filter(Q(a=1) | Q(a=2))
  2547
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 2)
  2548
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0)
  2549
+
  2550
+    def test_disjunction_promotion7(self):
  2551
+        qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
  2552
+        self.assertEqual(str(qs.query).count('JOIN'), 0)
  2553
+        qs = BaseA.objects.filter(Q(a__f1='foo') | (Q(b__f1='foo') & Q(a__f1='bar')))
  2554
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2555
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2556
+        qs = BaseA.objects.filter(
  2557
+            (Q(a__f1='foo') | Q(b__f1='foo')) & (Q(a__f1='bar') | Q(c__f1='foo'))
  2558
+        )
  2559
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 3)
  2560
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 0)
  2561
+        qs = BaseA.objects.filter(
  2562
+            (Q(a__f1='foo') | (Q(a__f1='bar')) & (Q(b__f1='bar') | Q(c__f1='foo')))
  2563
+        )
  2564
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
  2565
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2566
+
  2567
+    def test_disjunction_promotion_fexpression(self):
  2568
+        qs = BaseA.objects.filter(Q(a__f1=F('b__f1')) | Q(b__f1='foo'))
  2569
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
  2570
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
  2571
+        qs = BaseA.objects.filter(Q(a__f1=F('c__f1')) | Q(b__f1='foo'))
  2572
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 3)
  2573
+        qs = BaseA.objects.filter(Q(a__f1=F('b__f1')) | Q(a__f2=F('b__f2')) | Q(c__f1='foo'))
  2574
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 3)
  2575
+        qs = BaseA.objects.filter(Q(a__f1=F('c__f1')) | (Q(pk=1) & Q(pk=2)))
  2576
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
  2577
+        self.assertEqual(str(qs.query).count('INNER JOIN'), 0)

0 notes on commit d407164

Please sign in to comment.
Something went wrong with that request. Please try again.