Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added support for modifying the effect of ``DISTINCT`` clauses so they

only consider some fields (PostgreSQL only).

For this, the ``distinct()`` QuerySet method now accepts an optional
list of model fields names and generates ``DISTINCT ON`` clauses on
these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work.

Fixes #6422.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 287565779d3ae4d3229ecbb2ff356c79b920e7d0 1 parent 03eb290
Ramiro Morales authored December 22, 2011
2  AUTHORS
@@ -203,6 +203,7 @@ answer newbie questions, and generally made Django that much better:
203 203
     Marc Garcia <marc.garcia@accopensys.com>
204 204
     Andy Gayton <andy-django@thecablelounge.com>
205 205
     geber@datacollect.com
  206
+    Jeffrey Gelens <jeffrey@gelens.org>
206 207
     Baishampayan Ghose
207 208
     Joshua Ginsberg <jag@flowtheory.net>
208 209
     Dimitris Glezos <dimitris@glezos.com>
@@ -269,6 +270,7 @@ answer newbie questions, and generally made Django that much better:
269 270
     jpellerin@gmail.com
270 271
     junzhang.jn@gmail.com
271 272
     Xia Kai <http://blog.xiaket.org/>
  273
+    Anssi Kääriäinen
272 274
     Antti Kaihola <http://djangopeople.net/akaihola/>
273 275
     Peter van Kampen
274 276
     Bahadır Kandemir <bahadir@pardus.org.tr>
14  django/db/backends/__init__.py
@@ -406,6 +406,9 @@ class BaseDatabaseFeatures(object):
406 406
     supports_stddev = None
407 407
     can_introspect_foreign_keys = None
408 408
 
  409
+    # Support for the DISTINCT ON clause
  410
+    can_distinct_on_fields = False
  411
+
409 412
     def __init__(self, connection):
410 413
         self.connection = connection
411 414
 
@@ -559,6 +562,17 @@ def fulltext_search_sql(self, field_name):
559 562
         """
560 563
         raise NotImplementedError('Full-text search is not implemented for this database backend')
561 564
 
  565
+    def distinct_sql(self, fields):
  566
+        """
  567
+        Returns an SQL DISTINCT clause which removes duplicate rows from the
  568
+        result set. If any fields are given, only the given fields are being
  569
+        checked for duplicates.
  570
+        """
  571
+        if fields:
  572
+            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
  573
+        else:
  574
+            return 'DISTINCT'
  575
+
562 576
     def last_executed_query(self, cursor, sql, params):
563 577
         """
564 578
         Returns a string of the query last executed by the given cursor, with
1  django/db/backends/postgresql_psycopg2/base.py
@@ -82,6 +82,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
82 82
     has_select_for_update_nowait = True
83 83
     has_bulk_insert = True
84 84
     supports_tablespaces = True
  85
+    can_distinct_on_fields = True
85 86
 
86 87
 class DatabaseWrapper(BaseDatabaseWrapper):
87 88
     vendor = 'postgresql'
6  django/db/backends/postgresql_psycopg2/operations.py
@@ -179,6 +179,12 @@ def max_name_length(self):
179 179
 
180 180
         return 63
181 181
 
  182
+    def distinct_sql(self, fields):
  183
+        if fields:
  184
+            return 'DISTINCT ON (%s)' % ', '.join(fields)
  185
+        else:
  186
+            return 'DISTINCT'
  187
+
182 188
     def last_executed_query(self, cursor, sql, params):
183 189
         # http://initd.org/psycopg/docs/cursor.html#cursor.query
184 190
         # The query attribute is a Psycopg extension to the DB API 2.0.
10  django/db/models/query.py
@@ -323,6 +323,8 @@ def aggregate(self, *args, **kwargs):
323 323
         If args is present the expression is passed as a kwarg using
324 324
         the Aggregate object's default alias.
325 325
         """
  326
+        if self.query.distinct_fields:
  327
+            raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
326 328
         for arg in args:
327 329
             kwargs[arg.default_alias] = arg
328 330
 
@@ -751,12 +753,14 @@ def order_by(self, *field_names):
751 753
         obj.query.add_ordering(*field_names)
752 754
         return obj
753 755
 
754  
-    def distinct(self, true_or_false=True):
  756
+    def distinct(self, *field_names):
755 757
         """
756 758
         Returns a new QuerySet instance that will select only distinct results.
757 759
         """
  760
+        assert self.query.can_filter(), \
  761
+                "Cannot create distinct fields once a slice has been taken."
758 762
         obj = self._clone()
759  
-        obj.query.distinct = true_or_false
  763
+        obj.query.add_distinct_fields(*field_names)
760 764
         return obj
761 765
 
762 766
     def extra(self, select=None, where=None, params=None, tables=None,
@@ -1179,7 +1183,7 @@ def order_by(self, *field_names):
1179 1183
         """
1180 1184
         return self
1181 1185
 
1182  
-    def distinct(self, true_or_false=True):
  1186
+    def distinct(self, fields=None):
1183 1187
         """
1184 1188
         Always returns EmptyQuerySet.
1185 1189
         """
109  django/db/models/sql/compiler.py
@@ -23,6 +23,8 @@ def pre_sql_setup(self):
23 23
         Does any necessary class setup immediately prior to producing SQL. This
24 24
         is for things that can't necessarily be done in __init__ because we
25 25
         might not have all the pieces in place at that time.
  26
+        # TODO: after the query has been executed, the altered state should be
  27
+        # cleaned. We are not using a clone() of the query here.
26 28
         """
27 29
         if not self.query.tables:
28 30
             self.query.join((None, self.query.model._meta.db_table, None, None))
@@ -60,11 +62,19 @@ def as_sql(self, with_limits=True, with_col_aliases=False):
60 62
             return '', ()
61 63
 
62 64
         self.pre_sql_setup()
  65
+        # After executing the query, we must get rid of any joins the query
  66
+        # setup created. So, take note of alias counts before the query ran.
  67
+        # However we do not want to get rid of stuff done in pre_sql_setup(),
  68
+        # as the pre_sql_setup will modify query state in a way that forbids
  69
+        # another run of it.
  70
+        self.refcounts_before = self.query.alias_refcount.copy()
63 71
         out_cols = self.get_columns(with_col_aliases)
64 72
         ordering, ordering_group_by = self.get_ordering()
65 73
 
66  
-        # This must come after 'select' and 'ordering' -- see docstring of
67  
-        # get_from_clause() for details.
  74
+        distinct_fields = self.get_distinct()
  75
+
  76
+        # This must come after 'select', 'ordering' and 'distinct' -- see
  77
+        # docstring of get_from_clause() for details.
68 78
         from_, f_params = self.get_from_clause()
69 79
 
70 80
         qn = self.quote_name_unless_alias
@@ -76,8 +86,10 @@ def as_sql(self, with_limits=True, with_col_aliases=False):
76 86
             params.extend(val[1])
77 87
 
78 88
         result = ['SELECT']
  89
+
79 90
         if self.query.distinct:
80  
-            result.append('DISTINCT')
  91
+            result.append(self.connection.ops.distinct_sql(distinct_fields))
  92
+
81 93
         result.append(', '.join(out_cols + self.query.ordering_aliases))
82 94
 
83 95
         result.append('FROM')
@@ -90,6 +102,9 @@ def as_sql(self, with_limits=True, with_col_aliases=False):
90 102
 
91 103
         grouping, gb_params = self.get_grouping()
92 104
         if grouping:
  105
+            if distinct_fields:
  106
+                raise NotImplementedError(
  107
+                    "annotate() + distinct(fields) not implemented.")
93 108
             if ordering:
94 109
                 # If the backend can't group by PK (i.e., any database
95 110
                 # other than MySQL), then any fields mentioned in the
@@ -129,6 +144,9 @@ def as_sql(self, with_limits=True, with_col_aliases=False):
129 144
                 raise DatabaseError('NOWAIT is not supported on this database backend.')
130 145
             result.append(self.connection.ops.for_update_sql(nowait=nowait))
131 146
 
  147
+        # Finally do cleanup - get rid of the joins we created above.
  148
+        self.query.reset_refcounts(self.refcounts_before)
  149
+
132 150
         return ' '.join(result), tuple(params)
133 151
 
134 152
     def as_nested_sql(self):
@@ -292,6 +310,26 @@ def get_default_columns(self, with_aliases=False, col_aliases=None,
292 310
                     col_aliases.add(field.column)
293 311
         return result, aliases
294 312
 
  313
+    def get_distinct(self):
  314
+        """
  315
+        Returns a quoted list of fields to use in DISTINCT ON part of the query.
  316
+
  317
+        Note that this method can alter the tables in the query, and thus it
  318
+        must be called before get_from_clause().
  319
+        """
  320
+        qn = self.quote_name_unless_alias
  321
+        qn2 = self.connection.ops.quote_name
  322
+        result = []
  323
+        opts = self.query.model._meta
  324
+
  325
+        for name in self.query.distinct_fields:
  326
+            parts = name.split(LOOKUP_SEP)
  327
+            field, col, alias, _, _ = self._setup_joins(parts, opts, None)
  328
+            col, alias = self._final_join_removal(col, alias)
  329
+            result.append("%s.%s" % (qn(alias), qn2(col)))
  330
+        return result
  331
+
  332
+
295 333
     def get_ordering(self):
296 334
         """
297 335
         Returns a tuple containing a list representing the SQL elements in the
@@ -384,21 +422,7 @@ def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
384 422
         """
385 423
         name, order = get_order_dir(name, default_order)
386 424
         pieces = name.split(LOOKUP_SEP)
387  
-        if not alias:
388  
-            alias = self.query.get_initial_alias()
389  
-        field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
390  
-                opts, alias, False)
391  
-        alias = joins[-1]
392  
-        col = target.column
393  
-        if not field.rel:
394  
-            # To avoid inadvertent trimming of a necessary alias, use the
395  
-            # refcount to show that we are referencing a non-relation field on
396  
-            # the model.
397  
-            self.query.ref_alias(alias)
398  
-
399  
-        # Must use left outer joins for nullable fields and their relations.
400  
-        self.query.promote_alias_chain(joins,
401  
-            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
  425
+        field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
402 426
 
403 427
         # If we get to this point and the field is a relation to another model,
404 428
         # append the default ordering for that model.
@@ -416,11 +440,47 @@ def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
416 440
                 results.extend(self.find_ordering_name(item, opts, alias,
417 441
                         order, already_seen))
418 442
             return results
  443
+        col, alias = self._final_join_removal(col, alias)
  444
+        return [(alias, col, order)]
  445
+
  446
+    def _setup_joins(self, pieces, opts, alias):
  447
+        """
  448
+        A helper method for get_ordering and get_distinct. This method will
  449
+        call query.setup_joins, handle refcounts and then promote the joins.
  450
+
  451
+        Note that get_ordering and get_distinct must produce same target
  452
+        columns on same input, as the prefixes of get_ordering and get_distinct
  453
+        must match. Executing SQL where this is not true is an error.
  454
+        """
  455
+        if not alias:
  456
+            alias = self.query.get_initial_alias()
  457
+        field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
  458
+                opts, alias, False)
  459
+        alias = joins[-1]
  460
+        col = target.column
  461
+        if not field.rel:
  462
+            # To avoid inadvertent trimming of a necessary alias, use the
  463
+            # refcount to show that we are referencing a non-relation field on
  464
+            # the model.
  465
+            self.query.ref_alias(alias)
419 466
 
  467
+        # Must use left outer joins for nullable fields and their relations.
  468
+        # Ordering or distinct must not affect the returned set, and INNER
  469
+        # JOINS for nullable fields could do this.
  470
+        self.query.promote_alias_chain(joins,
  471
+            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
  472
+        return field, col, alias, joins, opts
  473
+
  474
+    def _final_join_removal(self, col, alias):
  475
+        """
  476
+        A helper method for get_distinct and get_ordering. This method will
  477
+        trim extra not-needed joins from the tail of the join chain.
  478
+
  479
+        This is very similar to what is done in trim_joins, but we will
  480
+        trim LEFT JOINS here. It would be a good idea to consolidate this
  481
+        method and query.trim_joins().
  482
+        """
420 483
         if alias:
421  
-            # We have to do the same "final join" optimisation as in
422  
-            # add_filter, since the final column might not otherwise be part of
423  
-            # the select set (so we can't order on it).
424 484
             while 1:
425 485
                 join = self.query.alias_map[alias]
426 486
                 if col != join[RHS_JOIN_COL]:
@@ -428,7 +488,7 @@ def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
428 488
                 self.query.unref_alias(alias)
429 489
                 alias = join[LHS_ALIAS]
430 490
                 col = join[LHS_JOIN_COL]
431  
-        return [(alias, col, order)]
  491
+        return col, alias
432 492
 
433 493
     def get_from_clause(self):
434 494
         """
@@ -438,8 +498,8 @@ def get_from_clause(self):
438 498
         from-clause via a "select".
439 499
 
440 500
         This should only be called after any SQL construction methods that
441  
-        might change the tables we need. This means the select columns and
442  
-        ordering must be done first.
  501
+        might change the tables we need. This means the select columns,
  502
+        ordering and distinct must be done first.
443 503
         """
444 504
         result = []
445 505
         qn = self.quote_name_unless_alias
@@ -984,6 +1044,7 @@ def as_sql(self, qn=None):
984 1044
         """
985 1045
         if qn is None:
986 1046
             qn = self.quote_name_unless_alias
  1047
+
987 1048
         sql = ('SELECT %s FROM (%s) subquery' % (
988 1049
             ', '.join([
989 1050
                 aggregate.as_sql(qn, self.connection)
30  django/db/models/sql/query.py
@@ -127,6 +127,7 @@ def __init__(self, model, where=WhereNode):
127 127
         self.order_by = []
128 128
         self.low_mark, self.high_mark = 0, None  # Used for offset/limit
129 129
         self.distinct = False
  130
+        self.distinct_fields = []
130 131
         self.select_for_update = False
131 132
         self.select_for_update_nowait = False
132 133
         self.select_related = False
@@ -265,6 +266,7 @@ def clone(self, klass=None, memo=None, **kwargs):
265 266
         obj.order_by = self.order_by[:]
266 267
         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
267 268
         obj.distinct = self.distinct
  269
+        obj.distinct_fields = self.distinct_fields[:]
268 270
         obj.select_for_update = self.select_for_update
269 271
         obj.select_for_update_nowait = self.select_for_update_nowait
270 272
         obj.select_related = self.select_related
@@ -298,6 +300,7 @@ def clone(self, klass=None, memo=None, **kwargs):
298 300
         else:
299 301
             obj.used_aliases = set()
300 302
         obj.filter_is_sticky = False
  303
+
301 304
         obj.__dict__.update(kwargs)
302 305
         if hasattr(obj, '_setup_query'):
303 306
             obj._setup_query()
@@ -393,7 +396,7 @@ def get_count(self, using):
393 396
         Performs a COUNT() query using the current filter constraints.
394 397
         """
395 398
         obj = self.clone()
396  
-        if len(self.select) > 1 or self.aggregate_select:
  399
+        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
397 400
             # If a select clause exists, then the query has already started to
398 401
             # specify the columns that are to be returned.
399 402
             # In this case, we need to use a subquery to evaluate the count.
@@ -452,6 +455,8 @@ def combine(self, rhs, connector):
452 455
                 "Cannot combine queries once a slice has been taken."
453 456
         assert self.distinct == rhs.distinct, \
454 457
             "Cannot combine a unique query with a non-unique query."
  458
+        assert self.distinct_fields == rhs.distinct_fields, \
  459
+            "Cannot combine queries with different distinct fields."
455 460
 
456 461
         self.remove_inherited_models()
457 462
         # Work out how to relabel the rhs aliases, if necessary.
@@ -674,9 +679,9 @@ def ref_alias(self, alias):
674 679
         """ Increases the reference count for this alias. """
675 680
         self.alias_refcount[alias] += 1
676 681
 
677  
-    def unref_alias(self, alias):
  682
+    def unref_alias(self, alias, amount=1):
678 683
         """ Decreases the reference count for this alias. """
679  
-        self.alias_refcount[alias] -= 1
  684
+        self.alias_refcount[alias] -= amount
680 685
 
681 686
     def promote_alias(self, alias, unconditional=False):
682 687
         """
@@ -705,6 +710,15 @@ def promote_alias_chain(self, chain, must_promote=False):
705 710
             if self.promote_alias(alias, must_promote):
706 711
                 must_promote = True
707 712
 
  713
+    def reset_refcounts(self, to_counts):
  714
+        """
  715
+        This method will reset reference counts for aliases so that they match
  716
+        the value passed in :param to_counts:.
  717
+        """
  718
+        for alias, cur_refcount in self.alias_refcount.copy().items():
  719
+            unref_amount = cur_refcount - to_counts.get(alias, 0)
  720
+            self.unref_alias(alias, unref_amount)
  721
+
708 722
     def promote_unused_aliases(self, initial_refcounts, used_aliases):
709 723
         """
710 724
         Given a "before" copy of the alias_refcounts dictionary (as
@@ -832,7 +846,8 @@ def get_initial_alias(self):
832 846
     def count_active_tables(self):
833 847
         """
834 848
         Returns the number of tables in this query with a non-zero reference
835  
-        count.
  849
+        count. Note that after execution, the reference counts are zeroed, so
  850
+        tables added in compiler will not be seen by this method.
836 851
         """
837 852
         return len([1 for count in self.alias_refcount.itervalues() if count])
838 853
 
@@ -1596,6 +1611,13 @@ def clear_select_fields(self):
1596 1611
         self.select = []
1597 1612
         self.select_fields = []
1598 1613
 
  1614
+    def add_distinct_fields(self, *field_names):
  1615
+        """
  1616
+        Adds and resolves the given fields to the query's "distinct on" clause.
  1617
+        """
  1618
+        self.distinct_fields = field_names
  1619
+        self.distinct = True
  1620
+
1599 1621
     def add_fields(self, field_names, allow_m2m=True):
1600 1622
         """
1601 1623
         Adds the given (model) fields to the select set. The field names are
39  docs/ref/models/querysets.txt
@@ -345,7 +345,7 @@ remain undefined afterward).
345 345
 distinct
346 346
 ~~~~~~~~
347 347
 
348  
-.. method:: distinct()
  348
+.. method:: distinct([*fields])
349 349
 
350 350
 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
351 351
 eliminates duplicate rows from the query results.
@@ -374,6 +374,43 @@ query spans multiple tables, it's possible to get duplicate results when a
374 374
     :meth:`values()` together, be careful when ordering by fields not in the
375 375
     :meth:`values()` call.
376 376
 
  377
+.. versionadded:: 1.4
  378
+
  379
+The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
  380
+They are names of fields to which the ``DISTINCT`` should be limited. This
  381
+translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
  382
+duplicate rows not by comparing all fields in a row, but by comparing only the given
  383
+fields.
  384
+
  385
+.. note::
  386
+    Note that the ability to specify field names is only available in PostgreSQL.
  387
+
  388
+.. note::
  389
+    When using the ``DISTINCT ON`` functionality it is required that the columns given
  390
+    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
  391
+    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
  392
+    don't specify an order, then you'll get some arbitrary row.
  393
+
  394
+Examples::
  395
+
  396
+    >>> Author.objects.distinct()
  397
+    [...]
  398
+
  399
+    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
  400
+    [...]
  401
+
  402
+    >>> Entry.objects.order_by('blog').distinct('blog')
  403
+    [...]
  404
+
  405
+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
  406
+    [...]
  407
+
  408
+    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
  409
+    [...]
  410
+
  411
+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
  412
+    [...]
  413
+
377 414
 values
378 415
 ~~~~~~
379 416
 
10  docs/releases/1.4-alpha-1.txt
@@ -507,6 +507,16 @@ Django 1.4 also includes several smaller improvements worth noting:
507 507
   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
508 508
   cache backends.
509 509
 
  510
+* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
  511
+
  512
+  The ``distinct()`` ``Queryset`` method now accepts an optional list of model
  513
+  field names. If specified, then the ``DISTINCT`` statement is limited to these
  514
+  fields.  The PostgreSQL is the only of the database backends shipped with
  515
+  Django that supports this new functionality.
  516
+
  517
+  For more details, see the documentation for
  518
+  :meth:`~django.db.models.query.QuerySet.distinct`.
  519
+
510 520
 Backwards incompatible changes in 1.4
511 521
 =====================================
512 522
 
10  docs/releases/1.4.txt
@@ -498,6 +498,16 @@ Django 1.4 also includes several smaller improvements worth noting:
498 498
   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
499 499
   cache backends.
500 500
 
  501
+* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
  502
+
  503
+  The ``distinct()`` ``Queryset`` method now accepts an optional list of model
  504
+  field names. If specified, then the ``DISTINCT`` statement is limited to these
  505
+  fields.  The PostgreSQL is the only of the database backends shipped with
  506
+  Django that supports this new functionality.
  507
+
  508
+  For more details, see the documentation for
  509
+  :meth:`~django.db.models.query.QuerySet.distinct`.
  510
+
501 511
 .. _backwards-incompatible-changes-1.4:
502 512
 
503 513
 Backwards incompatible changes in 1.4
1  tests/modeltests/distinct_on_fields/__init__.py
... ...
@@ -0,0 +1 @@
  1
+#
39  tests/modeltests/distinct_on_fields/models.py
... ...
@@ -0,0 +1,39 @@
  1
+from django.db import models
  2
+
  3
+class Tag(models.Model):
  4
+    name = models.CharField(max_length=10)
  5
+    parent = models.ForeignKey('self', blank=True, null=True,
  6
+            related_name='children')
  7
+
  8
+    class Meta:
  9
+        ordering = ['name']
  10
+
  11
+    def __unicode__(self):
  12
+        return self.name
  13
+
  14
+class Celebrity(models.Model):
  15
+    name = models.CharField("Name", max_length=20)
  16
+    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
  17
+
  18
+    def __unicode__(self):
  19
+        return self.name
  20
+
  21
+class Fan(models.Model):
  22
+    fan_of = models.ForeignKey(Celebrity)
  23
+
  24
+class Staff(models.Model):
  25
+    id = models.IntegerField(primary_key=True)
  26
+    name = models.CharField(max_length=50)
  27
+    organisation = models.CharField(max_length=100)
  28
+    tags = models.ManyToManyField(Tag, through='StaffTag')
  29
+    coworkers = models.ManyToManyField('self')
  30
+
  31
+    def __unicode__(self):
  32
+        return self.name
  33
+
  34
+class StaffTag(models.Model):
  35
+    staff = models.ForeignKey(Staff)
  36
+    tag = models.ForeignKey(Tag)
  37
+
  38
+    def __unicode__(self):
  39
+        return u"%s -> %s" % (self.tag, self.staff)
116  tests/modeltests/distinct_on_fields/tests.py
... ...
@@ -0,0 +1,116 @@
  1
+from __future__ import absolute_import, with_statement
  2
+
  3
+from django.db.models import Max
  4
+from django.test import TestCase, skipUnlessDBFeature
  5
+
  6
+from .models import Tag, Celebrity, Fan, Staff, StaffTag
  7
+
  8
+class DistinctOnTests(TestCase):
  9
+    def setUp(self):
  10
+        t1 = Tag.objects.create(name='t1')
  11
+        t2 = Tag.objects.create(name='t2', parent=t1)
  12
+        t3 = Tag.objects.create(name='t3', parent=t1)
  13
+        t4 = Tag.objects.create(name='t4', parent=t3)
  14
+        t5 = Tag.objects.create(name='t5', parent=t3)
  15
+
  16
+        p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
  17
+        p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
  18
+        p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
  19
+        p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
  20
+        p1_o1.coworkers.add(p2_o1, p3_o1)
  21
+        StaffTag.objects.create(staff=p1_o1, tag=t1)
  22
+        StaffTag.objects.create(staff=p1_o1, tag=t1)
  23
+
  24
+        celeb1 = Celebrity.objects.create(name="c1")
  25
+        celeb2 = Celebrity.objects.create(name="c2")
  26
+
  27
+        self.fan1 = Fan.objects.create(fan_of=celeb1)
  28
+        self.fan2 = Fan.objects.create(fan_of=celeb1)
  29
+        self.fan3 = Fan.objects.create(fan_of=celeb2)
  30
+
  31
+    @skipUnlessDBFeature('can_distinct_on_fields')
  32
+    def test_basic_distinct_on(self):
  33
+        """QuerySet.distinct('field', ...) works"""
  34
+        # (qset, expected) tuples
  35
+        qsets = (
  36
+            (
  37
+                Staff.objects.distinct().order_by('name'),
  38
+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
  39
+            ),
  40
+            (
  41
+                Staff.objects.distinct('name').order_by('name'),
  42
+                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
  43
+            ),
  44
+            (
  45
+                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
  46
+                ['<Staff: p1>', '<Staff: p1>'],
  47
+            ),
  48
+            (
  49
+                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
  50
+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
  51
+            ),
  52
+            (
  53
+                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
  54
+                    distinct('name').order_by('name'),
  55
+                ['<Celebrity: c1>', '<Celebrity: c2>'],
  56
+            ),
  57
+            # Does combining querysets work?
  58
+            (
  59
+                (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
  60
+                    distinct('name').order_by('name')
  61
+                |Celebrity.objects.filter(fan__in=[self.fan3]).\
  62
+                    distinct('name').order_by('name')),
  63
+                ['<Celebrity: c1>', '<Celebrity: c2>'],
  64
+            ),
  65
+            (
  66
+                StaffTag.objects.distinct('staff','tag'),
  67
+                ['<StaffTag: t1 -> p1>'],
  68
+            ),
  69
+            (
  70
+                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
  71
+                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
  72
+            ),
  73
+            (
  74
+                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
  75
+                ['<StaffTag: t1 -> p1>'],
  76
+            ),
  77
+            # Fetch the alphabetically first coworker for each worker
  78
+            (
  79
+                (Staff.objects.distinct('id').order_by('id', 'coworkers__name').
  80
+                               values_list('id', 'coworkers__name')),
  81
+                ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
  82
+            ),
  83
+        )
  84
+        for qset, expected in qsets:
  85
+            self.assertQuerysetEqual(qset, expected)
  86
+            self.assertEqual(qset.count(), len(expected))
  87
+
  88
+        # Combining queries with different distinct_fields is not allowed.
  89
+        base_qs = Celebrity.objects.all()
  90
+        self.assertRaisesMessage(
  91
+            AssertionError,
  92
+            "Cannot combine queries with different distinct fields.",
  93
+            lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
  94
+        )
  95
+
  96
+        # Test join unreffing
  97
+        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
  98
+        self.assertIn('OUTER JOIN', str(c1.query))
  99
+        c2 = c1.distinct('pk')
  100
+        self.assertNotIn('OUTER JOIN', str(c2.query))
  101
+
  102
+    @skipUnlessDBFeature('can_distinct_on_fields')
  103
+    def test_distinct_not_implemented_checks(self):
  104
+        # distinct + annotate not allowed
  105
+        with self.assertRaises(NotImplementedError):
  106
+            Celebrity.objects.annotate(Max('id')).distinct('id')[0]
  107
+        with self.assertRaises(NotImplementedError):
  108
+            Celebrity.objects.distinct('id').annotate(Max('id'))[0]
  109
+
  110
+        # However this check is done only when the query executes, so you
  111
+        # can use distinct() to remove the fields before execution.
  112
+        Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
  113
+        # distinct + aggregate not allowed
  114
+        with self.assertRaises(NotImplementedError):
  115
+            Celebrity.objects.distinct('id').aggregate(Max('id'))
  116
+
4  tests/regressiontests/queries/models.py
@@ -209,6 +209,9 @@ class Celebrity(models.Model):
209 209
     name = models.CharField("Name", max_length=20)
210 210
     greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
211 211
 
  212
+    def __unicode__(self):
  213
+        return self.name
  214
+
212 215
 class TvChef(Celebrity):
213 216
     pass
214 217
 
@@ -343,4 +346,3 @@ class OneToOneCategory(models.Model):
343 346
 
344 347
     def __unicode__(self):
345 348
         return "one2one " + self.new_name
346  
-
20  tests/regressiontests/queries/tests.py
@@ -234,18 +234,22 @@ def test_ticket2253(self):
234 234
             ['<Item: four>', '<Item: one>']
235 235
         )
236 236
 
237  
-    # FIXME: This is difficult to fix and very much an edge case, so punt for
238  
-    # now.  This is related to the order_by() tests for ticket #2253, but the
239  
-    # old bug exhibited itself here (q2 was pulling too many tables into the
240  
-    # combined query with the new ordering, but only because we have evaluated
241  
-    # q2 already).
242  
-    @unittest.expectedFailure
243 237
     def test_order_by_tables(self):
244 238
         q1 = Item.objects.order_by('name')
245 239
         q2 = Item.objects.filter(id=self.i1.id)
246 240
         list(q2)
247 241
         self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
248 242
 
  243
+    def test_order_by_join_unref(self):
  244
+        """
  245
+        This test is related to the above one, testing that there aren't
  246
+        old JOINs in the query.
  247
+        """
  248
+        qs = Celebrity.objects.order_by('greatest_fan__fan_of')
  249
+        self.assertIn('OUTER JOIN', str(qs.query))
  250
+        qs = qs.order_by('id')
  251
+        self.assertNotIn('OUTER JOIN', str(qs.query))
  252
+
249 253
     def test_tickets_4088_4306(self):
250 254
         self.assertQuerysetEqual(
251 255
             Report.objects.filter(creator=1001),
@@ -1728,7 +1732,7 @@ def test_recursive_fk_reverse(self):
1728 1732
 
1729 1733
 
1730 1734
 class ConditionalTests(BaseQuerysetTest):
1731  
-    """Tests whose execution depend on dfferent environment conditions like
  1735
+    """Tests whose execution depend on different environment conditions like
1732 1736
     Python version or DB backend features"""
1733 1737
 
1734 1738
     def setUp(self):
@@ -1739,6 +1743,7 @@ def setUp(self):
1739 1743
         t4 = Tag.objects.create(name='t4', parent=t3)
1740 1744
         t5 = Tag.objects.create(name='t5', parent=t3)
1741 1745
 
  1746
+
1742 1747
     # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
1743 1748
     # (Python issue 1242657), so these cases return an empty list, rather than
1744 1749
     # raising an exception. Not a lot we can do about that, unfortunately, due to
@@ -1810,6 +1815,7 @@ def test_ticket14244(self):
1810 1815
             2500
1811 1816
         )
1812 1817
 
  1818
+
1813 1819
 class UnionTests(unittest.TestCase):
1814 1820
     """
1815 1821
     Tests for the union of two querysets. Bug #12252.
6  tests/regressiontests/select_related_regress/tests.py
@@ -40,9 +40,9 @@ def test_regression_7110(self):

0 notes on commit 2875657

Please sign in to comment.
Something went wrong with that request. Please try again.