Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed #31331 -- Switched MySQL to group by selected primary keys. #16258

Merged
merged 2 commits into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion django/db/backends/base/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class BaseDatabaseFeatures:
gis_enabled = False
# Oracle can't group by LOB (large object) data types.
allows_group_by_lob = True
allows_group_by_pk = False
allows_group_by_selected_pks = False
allows_group_by_refs = True
empty_fetchmany_value = []
Expand Down
14 changes: 1 addition & 13 deletions django/db/backends/mysql/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class DatabaseFeatures(BaseDatabaseFeatures):
empty_fetchmany_value = ()
allows_group_by_pk = True
allows_group_by_selected_pks = True
related_fields_match_type = True
# MySQL doesn't support sliced subqueries with IN/ALL/ANY/SOME.
allow_sliced_subqueries_with_in = False
Expand Down Expand Up @@ -109,18 +109,6 @@ def django_test_skips(self):
"update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation",
},
}
if "ONLY_FULL_GROUP_BY" in self.connection.sql_mode:
skips.update(
{
"GROUP BY optimization does not work properly when "
"ONLY_FULL_GROUP_BY mode is enabled on MySQL, see #31331.": {
"aggregation.tests.AggregateTestCase."
"test_aggregation_subquery_annotation_multivalued",
"annotations.tests.NonAggregateAnnotationTestCase."
"test_annotation_aggregate_with_m2o",
},
}
)
if self.connection.mysql_is_mariadb and (
10,
4,
Expand Down
43 changes: 7 additions & 36 deletions django/db/models/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,41 +179,10 @@ def get_group_by(self, select, order_by):
return result

def collapse_group_by(self, expressions, having):
# If the DB can group by primary key, then group by the primary key of
# query's main model. Note that for PostgreSQL the GROUP BY clause must
# include the primary key of every table, but for MySQL it is enough to
# have the main table's primary key.
if self.connection.features.allows_group_by_pk:
# Determine if the main model's primary key is in the query.
pk = None
for expr in expressions:
# Is this a reference to query's base table primary key? If the
# expression isn't a Col-like, then skip the expression.
if (
getattr(expr, "target", None) == self.query.model._meta.pk
and getattr(expr, "alias", None) == self.query.base_table
):
pk = expr
break
# If the main model's primary key is in the query, group by that
# field, HAVING expressions, and expressions associated with tables
# that don't have a primary key included in the grouped columns.
if pk:
pk_aliases = {
expr.alias
for expr in expressions
if hasattr(expr, "target") and expr.target.primary_key
}
expressions = [pk] + [
expr
for expr in expressions
if expr in having
or (
getattr(expr, "alias", None) is not None
and expr.alias not in pk_aliases
)
]
elif self.connection.features.allows_group_by_selected_pks:
# If the database supports group by functional dependence reduction,
# then the expressions can be reduced to the set of selected table
# primary keys as all other columns are functionally dependent on them.
if self.connection.features.allows_group_by_selected_pks:
# Filter out all expressions associated with a table's primary key
# present in the grouped columns. This is done by identifying all
# tables that have their primary key included in the grouped
Expand All @@ -235,7 +204,9 @@ def collapse_group_by(self, expressions, having):
expressions = [
expr
for expr in expressions
if expr in pks or getattr(expr, "alias", None) not in aliases
if expr in pks
or expr in having
felixxm marked this conversation as resolved.
Show resolved Hide resolved
or getattr(expr, "alias", None) not in aliases
]
return expressions

Expand Down
7 changes: 6 additions & 1 deletion docs/releases/4.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,12 @@ Database backend API
This section describes changes that may be needed in third-party database
backends.

* ...
* ``DatabaseFeatures.allows_group_by_pk`` is removed as it only remained to
accommodate a MySQL extension that has been supplanted by proper functional
dependency detection in MySQL 5.7.15. Note that
``DatabaseFeatures.allows_group_by_selected_pks`` is still supported and
should be enabled if your backend supports functional dependency detection in
``GROUP BY`` clauses as specified by the ``SQL:1999`` standard.

Dropped support for MariaDB 10.3
--------------------------------
Expand Down
14 changes: 5 additions & 9 deletions tests/aggregation_regress/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
Variance,
When,
)
from django.test import TestCase, skipUnlessAnyDBFeature, skipUnlessDBFeature
from django.test import TestCase, skipUnlessDBFeature
from django.test.utils import Approximate

from .models import (
Expand Down Expand Up @@ -1420,7 +1420,7 @@ def test_annotate_joins(self):
# The query executes without problems.
self.assertEqual(len(qs.exclude(publisher=-1)), 6)

@skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
@skipUnlessDBFeature("allows_group_by_selected_pks")
def test_aggregate_duplicate_columns(self):
# Regression test for #17144

Expand Down Expand Up @@ -1448,7 +1448,7 @@ def test_aggregate_duplicate_columns(self):
],
)

@skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
@skipUnlessDBFeature("allows_group_by_selected_pks")
def test_aggregate_duplicate_columns_only(self):
# Works with only() too.
results = Author.objects.only("id", "name").annotate(
Expand All @@ -1474,18 +1474,14 @@ def test_aggregate_duplicate_columns_only(self):
],
)

@skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
@skipUnlessDBFeature("allows_group_by_selected_pks")
def test_aggregate_duplicate_columns_select_related(self):
# And select_related()
results = Book.objects.select_related("contact").annotate(
num_authors=Count("authors")
)
_, _, grouping = results.query.get_compiler(using="default").pre_sql_setup()
# In the case of `group_by_selected_pks` we also group by contact.id
# because of the select_related.
self.assertEqual(
len(grouping), 1 if connection.features.allows_group_by_pk else 2
)
self.assertEqual(len(grouping), 2)
self.assertIn("id", grouping[0][0])
self.assertNotIn("name", grouping[0][0])
self.assertNotIn("contact", grouping[0][0])
Expand Down
15 changes: 0 additions & 15 deletions tests/annotations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,21 +550,6 @@ def test_values_with_pk_annotation(self):
for publisher in publishers.filter(pk=self.p1.pk):
self.assertEqual(publisher["book__rating"], publisher["total"])

@skipUnlessDBFeature("allows_group_by_pk")
def test_rawsql_group_by_collapse(self):
raw = RawSQL("SELECT MIN(id) FROM annotations_book", [])
qs = (
Author.objects.values("id")
.annotate(
min_book_id=raw,
count_friends=Count("friends"),
)
.order_by()
)
_, _, group_by = qs.query.get_compiler(using="default").pre_sql_setup()
self.assertEqual(len(group_by), 1)
self.assertNotEqual(raw, group_by[0])

felixxm marked this conversation as resolved.
Show resolved Hide resolved
def test_defer_annotation(self):
"""
Deferred attributes can be referenced by an annotation,
Expand Down