Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-40291][SQL] Improve the message for column not in group by clause error #37742

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@
],
"sqlState" : "22005"
},
"COLUMN_NOT_IN_GROUP_BY_CLAUSE" : {
"message" : [
"expression '<expression>' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, remove '' around and use toSQLExpr(), and first() should be quoted by back ticks.

],
"sqlState" : "42000"
},
"CONCURRENT_QUERY" : {
"message" : [
"Another instance of this query was just started by a concurrent session."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
s"if you don't care which value you get."
)
case e: Attribute if !groupingExprs.exists(_.semanticEquals(e)) =>
failAnalysis(
s"expression '${e.sql}' is neither present in the group by, " +
s"nor is it an aggregate function. " +
"Add to group by or wrap in first() (or first_value) if you don't care " +
"which value you get.")
throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
case s: ScalarSubquery
if s.children.nonEmpty && !groupingExprs.exists(_.semanticEquals(s)) =>
failAnalysis(s"Correlated scalar subquery '${s.sql}' is neither " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2527,4 +2527,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
messageParameters = Array(toSQLId(name), toSQLType(dt), toSQLType(expected)))
}

def columnNotInGroupByClauseError(expression: Expression): Throwable = {
new AnalysisException(
errorClass = "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
messageParameters = Array(expression.sql)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expression.sql -> toSQLExpr(expression)

)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ class AnalysisErrorSuite extends AnalysisTest {
errorTest(
"missing group by",
testRelation2.groupBy($"a")($"b"),
"'b'" :: "group by" :: Nil
"'b'" :: "COLUMN_NOT_IN_GROUP_BY_CLAUSE" :: Nil
)

errorTest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,13 @@ SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down Expand Up @@ -708,7 +714,13 @@ SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down
24 changes: 21 additions & 3 deletions sql/core/src/test/resources/sql-tests/results/group-by.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ SELECT a, COUNT(b) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down Expand Up @@ -107,7 +113,13 @@ SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down Expand Up @@ -198,7 +210,13 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdatahassamenamewithalias.k"
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@ SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (())
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 't.c1' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "t.c1"
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ CREATE VIEW key_dependent_view AS
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'spark_catalog.default.view_base_table.data' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "spark_catalog.default.view_base_table.data"
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down Expand Up @@ -107,7 +113,13 @@ SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdata.a"
}
}


-- !query
Expand Down Expand Up @@ -182,7 +194,13 @@ SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "testdatahassamenamewithalias.k"
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -897,10 +897,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
val e = intercept[AnalysisException] {
sql("SELECT * FROM v3")
}.getMessage
assert(e.contains(
"expression 'spark_catalog.default.t.c1' is neither present " +
"in the group by, nor is it an aggregate function. Add to group by or wrap in " +
"first() (or first_value) if you don't care which value you get."))
assert(
e.contains("COLUMN_NOT_IN_GROUP_BY_CLAUSE") &&
e.contains("spark_catalog.default.t.c1"))
}
withSQLConf(GROUP_BY_ALIASES.key -> "false") {
val e = intercept[AnalysisException] {
Expand Down