django · felixxm · Nov 7, 2022 · Nov 5, 2022 · Nov 5, 2022
diff --git a/django/db/backends/base/features.py b/django/db/backends/base/features.py
@@ -8,7 +8,6 @@ class BaseDatabaseFeatures:
     gis_enabled = False
     # Oracle can't group by LOB (large object) data types.
     allows_group_by_lob = True
-    allows_group_by_pk = False
     allows_group_by_selected_pks = False
     allows_group_by_refs = True
     empty_fetchmany_value = []

diff --git a/django/db/backends/mysql/features.py b/django/db/backends/mysql/features.py
@@ -6,7 +6,7 @@
 
 class DatabaseFeatures(BaseDatabaseFeatures):
     empty_fetchmany_value = ()
-    allows_group_by_pk = True
+    allows_group_by_selected_pks = True
     related_fields_match_type = True
     # MySQL doesn't support sliced subqueries with IN/ALL/ANY/SOME.
     allow_sliced_subqueries_with_in = False
@@ -109,18 +109,6 @@ def django_test_skips(self):
                 "update.tests.AdvancedTests.test_update_ordered_by_m2m_annotation",
             },
         }
-        if "ONLY_FULL_GROUP_BY" in self.connection.sql_mode:
-            skips.update(
-                {
-                    "GROUP BY optimization does not work properly when "
-                    "ONLY_FULL_GROUP_BY mode is enabled on MySQL, see #31331.": {
-                        "aggregation.tests.AggregateTestCase."
-                        "test_aggregation_subquery_annotation_multivalued",
-                        "annotations.tests.NonAggregateAnnotationTestCase."
-                        "test_annotation_aggregate_with_m2o",
-                    },
-                }
-            )
         if self.connection.mysql_is_mariadb and (
             10,
             4,

diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
@@ -179,41 +179,10 @@ def get_group_by(self, select, order_by):
         return result
 
     def collapse_group_by(self, expressions, having):
-        # If the DB can group by primary key, then group by the primary key of
-        # query's main model. Note that for PostgreSQL the GROUP BY clause must
-        # include the primary key of every table, but for MySQL it is enough to
-        # have the main table's primary key.
-        if self.connection.features.allows_group_by_pk:
-            # Determine if the main model's primary key is in the query.
-            pk = None
-            for expr in expressions:
-                # Is this a reference to query's base table primary key? If the
-                # expression isn't a Col-like, then skip the expression.
-                if (
-                    getattr(expr, "target", None) == self.query.model._meta.pk
-                    and getattr(expr, "alias", None) == self.query.base_table
-                ):
-                    pk = expr
-                    break
-            # If the main model's primary key is in the query, group by that
-            # field, HAVING expressions, and expressions associated with tables
-            # that don't have a primary key included in the grouped columns.
-            if pk:
-                pk_aliases = {
-                    expr.alias
-                    for expr in expressions
-                    if hasattr(expr, "target") and expr.target.primary_key
-                }
-                expressions = [pk] + [
-                    expr
-                    for expr in expressions
-                    if expr in having
-                    or (
-                        getattr(expr, "alias", None) is not None
-                        and expr.alias not in pk_aliases
-                    )
-                ]
-        elif self.connection.features.allows_group_by_selected_pks:
+        # If the database supports group by functional dependence reduction,
+        # then the expressions can be reduced to the set of selected table
+        # primary keys as all other columns are functionally dependent on them.
+        if self.connection.features.allows_group_by_selected_pks:
             # Filter out all expressions associated with a table's primary key
             # present in the grouped columns. This is done by identifying all
             # tables that have their primary key included in the grouped
@@ -235,7 +204,9 @@ def collapse_group_by(self, expressions, having):
             expressions = [
                 expr
                 for expr in expressions
-                if expr in pks or getattr(expr, "alias", None) not in aliases
+                if expr in pks
+                or expr in having
+                or getattr(expr, "alias", None) not in aliases
             ]
         return expressions
 

diff --git a/docs/releases/4.2.txt b/docs/releases/4.2.txt
@@ -309,7 +309,12 @@ Database backend API
 This section describes changes that may be needed in third-party database
 backends.
 
-* ...
+* ``DatabaseFeatures.allows_group_by_pk`` is removed as it only remained to
+  accommodate a MySQL extension that has been supplanted by proper functional
+  dependency detection in MySQL 5.7.15. Note that
+  ``DatabaseFeatures.allows_group_by_selected_pks`` is still supported and
+  should be enabled if your backend supports functional dependency detection in
+  ``GROUP BY`` clauses as specified by the ``SQL:1999`` standard.
 
 Dropped support for MariaDB 10.3
 --------------------------------

diff --git a/tests/aggregation_regress/tests.py b/tests/aggregation_regress/tests.py
@@ -23,7 +23,7 @@
     Variance,
     When,
 )
-from django.test import TestCase, skipUnlessAnyDBFeature, skipUnlessDBFeature
+from django.test import TestCase, skipUnlessDBFeature
 from django.test.utils import Approximate
 
 from .models import (
@@ -1420,7 +1420,7 @@ def test_annotate_joins(self):
         # The query executes without problems.
         self.assertEqual(len(qs.exclude(publisher=-1)), 6)
 
-    @skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
+    @skipUnlessDBFeature("allows_group_by_selected_pks")
     def test_aggregate_duplicate_columns(self):
         # Regression test for #17144
 
@@ -1448,7 +1448,7 @@ def test_aggregate_duplicate_columns(self):
             ],
         )
 
-    @skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
+    @skipUnlessDBFeature("allows_group_by_selected_pks")
     def test_aggregate_duplicate_columns_only(self):
         # Works with only() too.
         results = Author.objects.only("id", "name").annotate(
@@ -1474,18 +1474,14 @@ def test_aggregate_duplicate_columns_only(self):
             ],
         )
 
-    @skipUnlessAnyDBFeature("allows_group_by_pk", "allows_group_by_selected_pks")
+    @skipUnlessDBFeature("allows_group_by_selected_pks")
     def test_aggregate_duplicate_columns_select_related(self):
         # And select_related()
         results = Book.objects.select_related("contact").annotate(
             num_authors=Count("authors")
         )
         _, _, grouping = results.query.get_compiler(using="default").pre_sql_setup()
-        # In the case of `group_by_selected_pks` we also group by contact.id
-        # because of the select_related.
-        self.assertEqual(
-            len(grouping), 1 if connection.features.allows_group_by_pk else 2
-        )
+        self.assertEqual(len(grouping), 2)
         self.assertIn("id", grouping[0][0])
         self.assertNotIn("name", grouping[0][0])
         self.assertNotIn("contact", grouping[0][0])

diff --git a/tests/annotations/tests.py b/tests/annotations/tests.py
@@ -550,21 +550,6 @@ def test_values_with_pk_annotation(self):
         for publisher in publishers.filter(pk=self.p1.pk):
             self.assertEqual(publisher["book__rating"], publisher["total"])
 
-    @skipUnlessDBFeature("allows_group_by_pk")
-    def test_rawsql_group_by_collapse(self):
-        raw = RawSQL("SELECT MIN(id) FROM annotations_book", [])
-        qs = (
-            Author.objects.values("id")
-            .annotate(
-                min_book_id=raw,
-                count_friends=Count("friends"),
-            )
-            .order_by()
-        )
-        _, _, group_by = qs.query.get_compiler(using="default").pre_sql_setup()
-        self.assertEqual(len(group_by), 1)
-        self.assertNotEqual(raw, group_by[0])
-
     def test_defer_annotation(self):
         """
         Deferred attributes can be referenced by an annotation,