Browse files

queryset-refactor: Changed the way order_by() and distinct() interact.

When using "select distinct" all ordering columns must be part of the output
(select) columns. We were previously just throwing away ordering columns that
weren't included, but there are some cases where they are needed and it's
difficult to add them in manually. So now the default behaviour is to append
any missing columns.

This can affect the output of distinct() if complicated order_by() constructs
are used, so the documentation has been updated with an explanation of what's
going on there.

Fixed #7070.

git-svn-id: bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
1 parent dd0c585 commit 8a4e1de8b0573ce4330f7690fe5e9d786265e237 @malcolmt malcolmt committed Apr 24, 2008
Showing with 72 additions and 19 deletions.
  1. +33 −10 django/db/models/sql/
  2. +29 −5 docs/db-api.txt
  3. +2 −2 tests/modeltests/many_to_one/
  4. +8 −2 tests/regressiontests/queries/
@@ -52,6 +52,7 @@ def __init__(self, model, connection, where=WhereNode):
self.default_cols = True
self.default_ordering = True
self.standard_ordering = True
+ self.ordering_aliases = []
self.start_meta = None
# SQL-related attributes
@@ -139,6 +140,7 @@ def clone(self, klass=None, **kwargs):
obj.default_cols = self.default_cols
obj.default_ordering = self.default_ordering
obj.standard_ordering = self.standard_ordering
+ obj.ordering_aliases = []
obj.start_meta = self.start_meta =[:]
obj.tables = self.tables[:]
@@ -215,16 +217,18 @@ def as_sql(self, with_limits=True):
out_cols = self.get_columns()
ordering = self.get_ordering()
# This must come after 'select' and 'ordering' -- see docstring of
# get_from_clause() for details.
from_, f_params = self.get_from_clause()
where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias)
params = list(self.extra_select_params)
result = ['SELECT']
if self.distinct:
- result.append(', '.join(out_cols))
+ result.append(', '.join(out_cols + self.ordering_aliases))
@@ -465,15 +469,13 @@ def get_grouping(self):
def get_ordering(self):
- Returns a tuple representing the SQL elements in the "order by" clause.
+ Returns list representing the SQL elements in the "order by" clause.
+ Also sets the ordering_aliases attribute on this instance to a list of
+ extra aliases needed in the select.
Determining the ordering SQL can change the tables we need to include,
so this should be run *before* get_from_clause().
- # FIXME: It's an SQL-92 requirement that all ordering columns appear as
- # output columns in the query (in the select statement) or be ordinals.
- # We don't enforce that here, but we should (by adding to the select
- # columns), for portability.
if self.extra_order_by:
ordering = self.extra_order_by
elif not self.default_ordering:
@@ -485,6 +487,7 @@ def get_ordering(self):
distinct = self.distinct
select_aliases = self._select_aliases
result = []
+ ordering_aliases = []
if self.standard_ordering:
asc, desc = ORDER_DIR['ASC']
@@ -515,13 +518,16 @@ def get_ordering(self):
for table, col, order in self.find_ordering_name(field,
self.model._meta, default_order=asc):
elt = '%s.%s' % (qn(table), qn2(col))
- if not distinct or elt in select_aliases:
- result.append('%s %s' % (elt, order))
+ if distinct and elt not in select_aliases:
+ ordering_aliases.append(elt)
+ result.append('%s %s' % (elt, order))
col, order = get_order_dir(field, asc)
elt = qn(col)
- if not distinct or elt in select_aliases:
- result.append('%s %s' % (elt, order))
+ if distinct and elt not in select_aliases:
+ ordering_aliases.append(elt)
+ result.append('%s %s' % (elt, order))
+ self.ordering_aliases = ordering_aliases
return result
def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
@@ -1379,8 +1385,14 @@ def execute_sql(self, result_type=MULTI):
if not result_type:
return cursor
if result_type == SINGLE:
+ if self.ordering_aliases:
+ return cursor.fetchone()[:-len(results.ordering_aliases)]
return cursor.fetchone()
# The MULTI case.
+ if self.ordering_aliases:
+ return order_modified_iter(cursor, len(self.ordering_aliases),
+ self.connection.features.empty_fetchmany_value)
return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
@@ -1408,6 +1420,17 @@ def empty_iter():
yield iter([]).next()
+def order_modified_iter(cursor, trim, sentinel):
+ """
+ Yields blocks of rows from a cursor. We use this iterator in the special
+ case when extra output columns have been added to support ordering
+ requirements. We must trim those extra columns before anything else can use
+ the results, since they're only needed to make the SQL valid.
+ """
+ for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
+ sentinel):
+ yield [r[:-trim] for r in rows]
def setup_join_cache(sender):
The information needed to join between model fields is something that is
@@ -510,6 +510,10 @@ primary key if there is no ``Meta.ordering`` specified. For example::
...since the ``Blog`` model has no default ordering specified.
+Be cautious when ordering by fields in related models if you are also using
+``distinct()``. See the note in the `distinct()`_ section for an explanation
+of how related model ordering can change the expected results.
It is permissible to specify a multi-valued field to order the results by (for
example, a ``ManyToMany`` field). Normally this won't be a sensible thing to
do and it's really an advanced usage feature. However, if you know that your
@@ -559,10 +563,28 @@ eliminates duplicate rows from the query results.
By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
is rarely a problem, because simple queries such as ``Blog.objects.all()``
-don't introduce the possibility of duplicate result rows.
+don't introduce the possibility of duplicate result rows. However, if your
+query spans multiple tables, it's possible to get duplicate results when a
+``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
-However, if your query spans multiple tables, it's possible to get duplicate
-results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
+.. note::
+ Any fields used in an ``order_by()`` call are included in the SQL
+ ``SELECT`` columns. This can sometimes lead to unexpected results when
+ used in conjuntion with ``distinct()``. If you order by fields from a
+ related model, those fields will be added to the selected columns and they
+ may make otherwise duplicate rows appear to be distinct. Since the extra
+ columns don't appear in the returned results (they are only there to
+ support ordering), it sometimes looks like non-distinct results are being
+ returned.
+ Similarly, if you use a ``values()`` query to restrict the columns
+ selected, the columns used in any ``order_by()`` (or default model
+ ordering) will still be involved and may affect uniqueness of the results.
+ The moral here is that if you are using ``distinct()`` be careful about
+ ordering by related models. Similarly, when using ``distinct()`` and
+ ``values()`` together, be careful when ordering by fields not in the
+ ``values()`` call.
@@ -627,6 +649,9 @@ A couple of subtleties that are worth mentioning:
>>> Entry.objects.values('blog_id')
[{'blog_id': 1}, ...]
+ * When using ``values()`` together with ``distinct()``, be aware that
+ ordering can affect the results. See the note in the `distinct()`_
+ section, above, for details.
**New in Django development version:** Previously, it was not possible to pass
``blog_id`` to ``values()`` in the above example, only ``blog``.
@@ -841,8 +866,7 @@ You can only refer to ``ForeignKey`` relations in the list of fields passed to
list of fields and the ``depth`` parameter in the same ``select_related()``
call, since they are conflicting options.
-``extra(select=None, where=None, params=None, tables=None, order_by=None,
+``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)``
Sometimes, the Django query syntax by itself can't easily express a complex
@@ -250,9 +250,9 @@ class Meta:
>>> Reporter.objects.filter(article__reporter=r).distinct()
[<Reporter: John Smith>]
-# It's possible to use values() calls across many-to-one relations.
+# It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.)
>>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'}
->>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d]
+>>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d]
# If you delete a reporter, his articles will be deleted.
@@ -498,9 +498,15 @@ class Meta:
>>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four'))
[<Item: four>]
-Bug #5321
+Bug #5321, #7070
+Ordering columns must be included in the output columns. Note that this means
+results that might otherwise be distinct are not (if there are multiple values
+in the ordering cols), as in this example. This isn't a bug; it's a warning to
+be careful with the selection of ordering columns.
>>> Note.objects.values('misc').distinct().order_by('note', '-misc')
-[{'misc': u'foo'}, {'misc': u'bar'}]
+[{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}]
Bug #4358
If you don't pass any fields to values(), relation fields are returned as

0 comments on commit 8a4e1de

Please sign in to comment.