Skip to content

Commit

Permalink
[SPARK-40291][SQL] Improve the message for column not in group by cla…
Browse files Browse the repository at this point in the history
…use error

### What changes were proposed in this pull request?
Improve the message for columns not in group by clause error

### Why are the changes needed?
Use the new error class framework for columns not in group by clause error

### Does this PR introduce _any_ user-facing change?
Yes, adding error class

### How was this patch tested?
UT

Closes #37742 from linhongliu-db/SPARK-40291.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
linhongliu-db authored and MaxGekk committed Sep 7, 2022
1 parent 3ff2def commit 333140f
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 21 deletions.
6 changes: 6 additions & 0 deletions core/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@
],
"sqlState" : "22005"
},
"COLUMN_NOT_IN_GROUP_BY_CLAUSE" : {
"message" : [
"The expression <expression> is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in `first()` (or `first_value()`) if you don't care which value you get."
],
"sqlState" : "42000"
},
"CONCURRENT_QUERY" : {
"message" : [
"Another instance of this query was just started by a concurrent session."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,11 +346,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
s"if you don't care which value you get."
)
case e: Attribute if !groupingExprs.exists(_.semanticEquals(e)) =>
failAnalysis(
s"expression '${e.sql}' is neither present in the group by, " +
s"nor is it an aggregate function. " +
"Add to group by or wrap in first() (or first_value) if you don't care " +
"which value you get.")
throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
case s: ScalarSubquery
if s.children.nonEmpty && !groupingExprs.exists(_.semanticEquals(s)) =>
failAnalysis(s"Correlated scalar subquery '${s.sql}' is neither " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2554,4 +2554,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
messageParameters = Array(toSQLId(name), toSQLType(dt), toSQLType(expected)))
}

def columnNotInGroupByClauseError(expression: Expression): Throwable = {
new AnalysisException(
errorClass = "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
messageParameters = Array(toSQLExpr(expression))
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ class AnalysisErrorSuite extends AnalysisTest {
errorTest(
"missing group by",
testRelation2.groupBy($"a")($"b"),
"'b'" :: "group by" :: Nil
"\"b\"" :: "COLUMN_NOT_IN_GROUP_BY_CLAUSE" :: Nil
)

errorTest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,13 @@ SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down Expand Up @@ -708,7 +714,13 @@ SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down
24 changes: 21 additions & 3 deletions sql/core/src/test/resources/sql-tests/results/group-by.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ SELECT a, COUNT(b) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down Expand Up @@ -107,7 +113,13 @@ SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down Expand Up @@ -199,7 +211,13 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"k\""
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@ SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (())
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 't.c1' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"c1\""
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ CREATE VIEW key_dependent_view AS
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'spark_catalog.default.view_base_table.data' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"data\""
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down Expand Up @@ -107,7 +113,13 @@ SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"a\""
}
}


-- !query
Expand Down Expand Up @@ -182,7 +194,13 @@ SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
{
"errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
"sqlState" : "42000",
"messageParameters" : {
"expression" : "\"k\""
}
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -897,11 +897,11 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
withSQLConf(GROUP_BY_ORDINAL.key -> "false") {
val e = intercept[AnalysisException] {
sql("SELECT * FROM v3")
}.getMessage
assert(e.contains(
"expression 'spark_catalog.default.t.c1' is neither present " +
"in the group by, nor is it an aggregate function. Add to group by or wrap in " +
"first() (or first_value) if you don't care which value you get."))
}
checkError(e,
errorClass = "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
parameters = Map(
"expression" -> "\"c1\""))
}
withSQLConf(GROUP_BY_ALIASES.key -> "false") {
val e = intercept[AnalysisException] {
Expand Down

0 comments on commit 333140f

Please sign in to comment.