Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

queryset-refactor: Changed the way order_by() and distinct() interact.

When using "select distinct" all ordering columns must be part of the output
(select) columns. We were previously just throwing away ordering columns that
weren't included, but there are some cases where they are needed and it's
difficult to add them in manually. So now the default behaviour is to append
any missing columns.

This can affect the output of distinct() if complicated order_by() constructs
are used, so the documentation has been updated with an explanation of what's
going on there.

Fixed #7070.


git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7455 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 8a4e1de8b0573ce4330f7690fe5e9d786265e237 1 parent dd0c585
Malcolm Tredinnick authored April 24, 2008
43  django/db/models/sql/query.py
@@ -52,6 +52,7 @@ def __init__(self, model, connection, where=WhereNode):
52 52
         self.default_cols = True
53 53
         self.default_ordering = True
54 54
         self.standard_ordering = True
  55
+        self.ordering_aliases = []
55 56
         self.start_meta = None
56 57
 
57 58
         # SQL-related attributes
@@ -139,6 +140,7 @@ def clone(self, klass=None, **kwargs):
139 140
         obj.default_cols = self.default_cols
140 141
         obj.default_ordering = self.default_ordering
141 142
         obj.standard_ordering = self.standard_ordering
  143
+        obj.ordering_aliases = []
142 144
         obj.start_meta = self.start_meta
143 145
         obj.select = self.select[:]
144 146
         obj.tables = self.tables[:]
@@ -215,16 +217,18 @@ def as_sql(self, with_limits=True):
215 217
         self.pre_sql_setup()
216 218
         out_cols = self.get_columns()
217 219
         ordering = self.get_ordering()
  220
+
218 221
         # This must come after 'select' and 'ordering' -- see docstring of
219 222
         # get_from_clause() for details.
220 223
         from_, f_params = self.get_from_clause()
  224
+
221 225
         where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias)
222 226
         params = list(self.extra_select_params)
223 227
 
224 228
         result = ['SELECT']
225 229
         if self.distinct:
226 230
             result.append('DISTINCT')
227  
-        result.append(', '.join(out_cols))
  231
+        result.append(', '.join(out_cols + self.ordering_aliases))
228 232
 
229 233
         result.append('FROM')
230 234
         result.extend(from_)
@@ -465,15 +469,13 @@ def get_grouping(self):
465 469
 
466 470
     def get_ordering(self):
467 471
         """
468  
-        Returns a tuple representing the SQL elements in the "order by" clause.
  472
+        Returns list representing the SQL elements in the "order by" clause.
  473
+        Also sets the ordering_aliases attribute on this instance to a list of
  474
+        extra aliases needed in the select.
469 475
 
470 476
         Determining the ordering SQL can change the tables we need to include,
471 477
         so this should be run *before* get_from_clause().
472 478
         """
473  
-        # FIXME: It's an SQL-92 requirement that all ordering columns appear as
474  
-        # output columns in the query (in the select statement) or be ordinals.
475  
-        # We don't enforce that here, but we should (by adding to the select
476  
-        # columns), for portability.
477 479
         if self.extra_order_by:
478 480
             ordering = self.extra_order_by
479 481
         elif not self.default_ordering:
@@ -485,6 +487,7 @@ def get_ordering(self):
485 487
         distinct = self.distinct
486 488
         select_aliases = self._select_aliases
487 489
         result = []
  490
+        ordering_aliases = []
488 491
         if self.standard_ordering:
489 492
             asc, desc = ORDER_DIR['ASC']
490 493
         else:
@@ -515,13 +518,16 @@ def get_ordering(self):
515 518
                 for table, col, order in self.find_ordering_name(field,
516 519
                         self.model._meta, default_order=asc):
517 520
                     elt = '%s.%s' % (qn(table), qn2(col))
518  
-                    if not distinct or elt in select_aliases:
519  
-                        result.append('%s %s' % (elt, order))
  521
+                    if distinct and elt not in select_aliases:
  522
+                        ordering_aliases.append(elt)
  523
+                    result.append('%s %s' % (elt, order))
520 524
             else:
521 525
                 col, order = get_order_dir(field, asc)
522 526
                 elt = qn(col)
523  
-                if not distinct or elt in select_aliases:
524  
-                    result.append('%s %s' % (elt, order))
  527
+                if distinct and elt not in select_aliases:
  528
+                    ordering_aliases.append(elt)
  529
+                result.append('%s %s' % (elt, order))
  530
+        self.ordering_aliases = ordering_aliases
525 531
         return result
526 532
 
527 533
     def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
@@ -1379,8 +1385,14 @@ def execute_sql(self, result_type=MULTI):
1379 1385
         if not result_type:
1380 1386
             return cursor
1381 1387
         if result_type == SINGLE:
  1388
+            if self.ordering_aliases:
  1389
+                return cursor.fetchone()[:-len(results.ordering_aliases)]
1382 1390
             return cursor.fetchone()
  1391
+
1383 1392
         # The MULTI case.
  1393
+        if self.ordering_aliases:
  1394
+            return order_modified_iter(cursor, len(self.ordering_aliases),
  1395
+                    self.connection.features.empty_fetchmany_value)
1384 1396
         return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
1385 1397
                 self.connection.features.empty_fetchmany_value)
1386 1398
 
@@ -1408,6 +1420,17 @@ def empty_iter():
1408 1420
     """
1409 1421
     yield iter([]).next()
1410 1422
 
  1423
+def order_modified_iter(cursor, trim, sentinel):
  1424
+    """
  1425
+    Yields blocks of rows from a cursor. We use this iterator in the special
  1426
+    case when extra output columns have been added to support ordering
  1427
+    requirements. We must trim those extra columns before anything else can use
  1428
+    the results, since they're only needed to make the SQL valid.
  1429
+    """
  1430
+    for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
  1431
+            sentinel):
  1432
+        yield [r[:-trim] for r in rows]
  1433
+
1411 1434
 def setup_join_cache(sender):
1412 1435
     """
1413 1436
     The information needed to join between model fields is something that is
34  docs/db-api.txt
@@ -510,6 +510,10 @@ primary key if there is no ``Meta.ordering`` specified. For example::
510 510
 
511 511
 ...since the ``Blog`` model has no default ordering specified.
512 512
 
  513
+Be cautious when ordering by fields in related models if you are also using
  514
+``distinct()``. See the note in the `distinct()`_ section for an explanation
  515
+of how related model ordering can change the expected results.
  516
+
513 517
 It is permissible to specify a multi-valued field to order the results by (for
514 518
 example, a ``ManyToMany`` field). Normally this won't be a sensible thing to
515 519
 do and it's really an advanced usage feature. However, if you know that your
@@ -559,10 +563,28 @@ eliminates duplicate rows from the query results.
559 563
 
560 564
 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
561 565
 is rarely a problem, because simple queries such as ``Blog.objects.all()``
562  
-don't introduce the possibility of duplicate result rows.
  566
+don't introduce the possibility of duplicate result rows. However, if your
  567
+query spans multiple tables, it's possible to get duplicate results when a
  568
+``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
563 569
 
564  
-However, if your query spans multiple tables, it's possible to get duplicate
565  
-results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
  570
+.. note::
  571
+    Any fields used in an ``order_by()`` call are included in the SQL
  572
+    ``SELECT`` columns. This can sometimes lead to unexpected results when
  573
+    used in conjuntion with ``distinct()``. If you order by fields from a
  574
+    related model, those fields will be added to the selected columns and they
  575
+    may make otherwise duplicate rows appear to be distinct. Since the extra
  576
+    columns don't appear in the returned results (they are only there to
  577
+    support ordering), it sometimes looks like non-distinct results are being
  578
+    returned.
  579
+
  580
+    Similarly, if you use a ``values()`` query to restrict the columns
  581
+    selected, the columns used in any ``order_by()`` (or default model
  582
+    ordering) will still be involved and may affect uniqueness of the results.
  583
+
  584
+    The moral here is that if you are using ``distinct()`` be careful about
  585
+    ordering by related models. Similarly, when using ``distinct()`` and
  586
+    ``values()`` together, be careful when ordering by fields not in the
  587
+    ``values()`` call.
566 588
 
567 589
 ``values(*fields)``
568 590
 ~~~~~~~~~~~~~~~~~~~
@@ -627,6 +649,9 @@ A couple of subtleties that are worth mentioning:
627 649
 
628 650
         >>> Entry.objects.values('blog_id')
629 651
         [{'blog_id': 1}, ...]
  652
+    * When using ``values()`` together with ``distinct()``, be aware that
  653
+      ordering can affect the results. See the note in the `distinct()`_
  654
+      section, above, for details.
630 655
 
631 656
 **New in Django development version:** Previously, it was not possible to pass
632 657
 ``blog_id`` to ``values()`` in the above example, only ``blog``.
@@ -841,8 +866,7 @@ You can only refer to ``ForeignKey`` relations in the list of fields passed to
841 866
 list of fields and the ``depth`` parameter in the same ``select_related()``
842 867
 call, since they are conflicting options.
843 868
 
844  
-``extra(select=None, where=None, params=None, tables=None, order_by=None,
845  
-select_params=None)``
  869
+``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)``
846 870
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
847 871
 
848 872
 Sometimes, the Django query syntax by itself can't easily express a complex
4  tests/modeltests/many_to_one/models.py
@@ -250,9 +250,9 @@ class Meta:
250 250
 >>> Reporter.objects.filter(article__reporter=r).distinct()
251 251
 [<Reporter: John Smith>]
252 252
 
253  
-# It's possible to use values() calls across many-to-one relations.
  253
+# It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.)
254 254
 >>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'}
255  
->>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d]
  255
+>>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d]
256 256
 True
257 257
 
258 258
 # If you delete a reporter, his articles will be deleted.
10  tests/regressiontests/queries/models.py
@@ -498,9 +498,15 @@ class Meta:
498 498
 >>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four'))
499 499
 [<Item: four>]
500 500
 
501  
-Bug #5321
  501
+Bug #5321, #7070
  502
+
  503
+Ordering columns must be included in the output columns. Note that this means
  504
+results that might otherwise be distinct are not (if there are multiple values
  505
+in the ordering cols), as in this example. This isn't a bug; it's a warning to
  506
+be careful with the selection of ordering columns.
  507
+
502 508
 >>> Note.objects.values('misc').distinct().order_by('note', '-misc')
503  
-[{'misc': u'foo'}, {'misc': u'bar'}]
  509
+[{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}]
504 510
 
505 511
 Bug #4358
506 512
 If you don't pass any fields to values(), relation fields are returned as

0 notes on commit 8a4e1de

Please sign in to comment.
Something went wrong with that request. Please try again.