Permalink
Browse files

Fixed #17144 -- MySQL again groups by PK only

Thanks to Christian Oudard for the report and tests.
  • Loading branch information...
akaariai committed Nov 7, 2012
1 parent b1ac329 commit cafb266954e21dd55ddfa90597bcf02c022bcb7d
Showing with 125 additions and 32 deletions.
  1. +35 −31 django/db/models/sql/compiler.py
  2. +90 −1 tests/regressiontests/aggregation_regress/tests.py
@@ -103,21 +103,12 @@ def as_sql(self, with_limits=True, with_col_aliases=False):
result.append('WHERE %s' % where)
params.extend(w_params)
grouping, gb_params = self.get_grouping()
grouping, gb_params = self.get_grouping(ordering_group_by)
if grouping:
if distinct_fields:
raise NotImplementedError(
"annotate() + distinct(fields) not implemented.")
if ordering:
# If the backend can't group by PK (i.e., any database
# other than MySQL), then any fields mentioned in the
# ordering clause needs to be in the group by clause.
if not self.connection.features.allows_group_by_pk:
for col, col_params in ordering_group_by:
if col not in grouping:
grouping.append(str(col))
gb_params.extend(col_params)
else:
if not ordering:
ordering = self.connection.ops.force_no_ordering()
result.append('GROUP BY %s' % ', '.join(grouping))
params.extend(gb_params)
@@ -378,7 +369,7 @@ def get_ordering(self):
else:
order = asc
result.append('%s %s' % (field, order))
group_by.append((field, []))
group_by.append((str(field), []))
continue
col, order = get_order_dir(field, asc)
if col in self.query.aggregate_select:
@@ -538,39 +529,52 @@ def get_from_clause(self):
first = False
return result, []
def get_grouping(self):
def get_grouping(self, ordering_group_by):
"""
Returns a tuple representing the SQL elements in the "group by" clause.
"""
qn = self.quote_name_unless_alias
result, params = [], []
if self.query.group_by is not None:
if (len(self.query.model._meta.fields) == len(self.query.select) and
self.connection.features.allows_group_by_pk):
select_cols = self.query.select + self.query.related_select_cols
# Just the column, not the fields.
select_cols = [s[0] for s in select_cols]
if (len(self.query.model._meta.fields) == len(self.query.select)
and self.connection.features.allows_group_by_pk):
self.query.group_by = [
(self.query.model._meta.db_table, self.query.model._meta.pk.column)
]
group_by = self.query.group_by or []
extra_selects = []
for extra_select, extra_params in six.itervalues(self.query.extra_select):
extra_selects.append(extra_select)
params.extend(extra_params)
select_cols = [s.col for s in self.query.select]
related_select_cols = [s.col for s in self.query.related_select_cols]
cols = (group_by + select_cols + related_select_cols + extra_selects)
select_cols = []
seen = set()
cols = self.query.group_by + select_cols
for col in cols:
if col in seen:
continue
seen.add(col)
if isinstance(col, (list, tuple)):
result.append('%s.%s' % (qn(col[0]), qn(col[1])))
sql = '%s.%s' % (qn(col[0]), qn(col[1]))
elif hasattr(col, 'as_sql'):
result.append(col.as_sql(qn, self.connection))
sql = col.as_sql(qn, self.connection)
else:
result.append('(%s)' % str(col))
sql = '(%s)' % str(col)
if sql not in seen:
result.append(sql)
seen.add(sql)
# Still, we need to add all stuff in ordering (except if the backend can
# group by just by PK).
if ordering_group_by and not self.connection.features.allows_group_by_pk:
for order, order_params in ordering_group_by:
# Even if we have seen the same SQL string, it might have
# different params, so, we add same SQL in "has params" case.
if order not in seen or params:
result.append(order)
params.extend(order_params)
seen.add(order)
# Unconditionally add the extra_select items.
for extra_select, extra_params in self.query.extra_select.values():
sql = '(%s)' % str(extra_select)
result.append(sql)
params.extend(extra_params)
return result, params
def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
@@ -470,7 +470,7 @@ def test_more_more(self):
# Regression for #15709 - Ensure each group_by field only exists once
# per query
qs = Book.objects.values('publisher').annotate(max_pages=Max('pages')).order_by()
grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping()
grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping([])
self.assertEqual(len(grouping), 1)
def test_duplicate_alias(self):
@@ -889,3 +889,92 @@ def test_annotate_joins(self):
self.assertIs(qs.query.alias_map['aggregation_regress_book'].join_type, None)
# Check that the query executes without problems.
self.assertEqual(len(qs.exclude(publisher=-1)), 6)
@skipUnlessDBFeature("allows_group_by_pk")
def test_aggregate_duplicate_columns(self):
# Regression test for #17144
results = Author.objects.annotate(num_contacts=Count('book_contact_set'))
# There should only be one GROUP BY clause, for the `id` column.
# `name` and `age` should not be grouped on.
grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
self.assertEqual(len(grouping), 1)
assert 'id' in grouping[0]
assert 'name' not in grouping[0]
assert 'age' not in grouping[0]
# The query group_by property should also only show the `id`.
self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])
# Ensure that we get correct results.
self.assertEqual(
[(a.name, a.num_contacts) for a in results.order_by('name')],
[
('Adrian Holovaty', 1),
('Brad Dayley', 1),
('Jacob Kaplan-Moss', 0),
('James Bennett', 1),
('Jeffrey Forcier', 1),
('Paul Bissex', 0),
('Peter Norvig', 2),
('Stuart Russell', 0),
('Wesley J. Chun', 0),
]
)
@skipUnlessDBFeature("allows_group_by_pk")
def test_aggregate_duplicate_columns_only(self):
# Works with only() too.
results = Author.objects.only('id', 'name').annotate(num_contacts=Count('book_contact_set'))
grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
self.assertEqual(len(grouping), 1)
assert 'id' in grouping[0]
assert 'name' not in grouping[0]
assert 'age' not in grouping[0]
# The query group_by property should also only show the `id`.
self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])
# Ensure that we get correct results.
self.assertEqual(
[(a.name, a.num_contacts) for a in results.order_by('name')],
[
('Adrian Holovaty', 1),
('Brad Dayley', 1),
('Jacob Kaplan-Moss', 0),
('James Bennett', 1),
('Jeffrey Forcier', 1),
('Paul Bissex', 0),
('Peter Norvig', 2),
('Stuart Russell', 0),
('Wesley J. Chun', 0),
]
)
@skipUnlessDBFeature("allows_group_by_pk")
def test_aggregate_duplicate_columns_select_related(self):
# And select_related()
results = Book.objects.select_related('contact').annotate(
num_authors=Count('authors'))
grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
self.assertEqual(len(grouping), 1)
assert 'id' in grouping[0]
assert 'name' not in grouping[0]
assert 'contact' not in grouping[0]
# The query group_by property should also only show the `id`.
self.assertEqual(results.query.group_by, [('aggregation_regress_book', 'id')])
# Ensure that we get correct results.
self.assertEqual(
[(b.name, b.num_authors) for b in results.order_by('name')],
[
('Artificial Intelligence: A Modern Approach', 2),
('Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1),
('Practical Django Projects', 1),
('Python Web Development with Django', 3),
('Sams Teach Yourself Django in 24 Hours', 1),
('The Definitive Guide to Django: Web Development Done Right', 2)
]
)

0 comments on commit cafb266

Please sign in to comment.