diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index c0e86df41299a..af566d9f5505a 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -5886,6 +5886,18 @@ ], "sqlState" : "38000" }, + "QUALIFY_AGGREGATE_NOT_ALLOWED" : { + "message" : [ + "Aggregate functions are not supported in QUALIFY: ." + ], + "sqlState" : "42903" + }, + "QUALIFY_REQUIRES_WINDOW_FUNCTION" : { + "message" : [ + "The QUALIFY clause requires at least one window function in the current SELECT list or the QUALIFY condition." + ], + "sqlState" : "42903" + }, "RECURSION_LEVEL_LIMIT_EXCEEDED" : { "message" : [ "Recursion level limit reached but query has not exhausted, try increasing it like 'WITH RECURSIVE t(col) MAX RECURSION LEVEL 200'." diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index f96c222ecd61a..8621eca79a6c8 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -687,6 +687,7 @@ Below is a list of all the keywords in Spark SQL. |PROCEDURES|non-reserved|non-reserved|non-reserved| |PROPERTIES|non-reserved|non-reserved|non-reserved| |PURGE|non-reserved|non-reserved|non-reserved| +|QUALIFY|non-reserved|non-reserved|non-reserved| |QUARTER|non-reserved|non-reserved|non-reserved| |QUERY|non-reserved|non-reserved|non-reserved| |RANGE|non-reserved|non-reserved|reserved| diff --git a/docs/sql-ref-syntax-qry-select-qualify.md b/docs/sql-ref-syntax-qry-select-qualify.md new file mode 100644 index 0000000000000..cae018203a8f4 --- /dev/null +++ b/docs/sql-ref-syntax-qry-select-qualify.md @@ -0,0 +1,99 @@ +--- +layout: global +title: QUALIFY Clause +displayTitle: QUALIFY Clause +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +### Description + +The `QUALIFY` clause filters rows after window functions have been evaluated. +It can refer to window functions in the `SELECT` list by alias, or define window +functions directly in the `QUALIFY` condition. When an alias in the `SELECT` list +has the same name as an input column, the input column takes precedence. + +### Syntax + +```sql +QUALIFY boolean_expression +``` + +### Parameters + +* **boolean_expression** + + Specifies any expression that evaluates to a result type `boolean`. Two or + more expressions may be combined together using the logical + operators ( `AND`, `OR` ). + + **Note** + + The current query's `SELECT` list or the `QUALIFY` condition must contain at least + one window function. Aggregate functions are not allowed in the `QUALIFY` condition. + +### Examples + +```sql +CREATE TABLE dealer (id INT, city STRING, car_model STRING, quantity INT); +INSERT INTO dealer VALUES + (100, 'Fremont', 'Honda Civic', 10), + (100, 'Fremont', 'Honda Accord', 15), + (100, 'Fremont', 'Honda CRV', 7), + (200, 'Dublin', 'Honda Civic', 20), + (200, 'Dublin', 'Honda Accord', 10), + (200, 'Dublin', 'Honda CRV', 3), + (300, 'San Jose', 'Honda Civic', 5), + (300, 'San Jose', 'Honda Accord', 8); + +-- `QUALIFY` clause referring to a window function in the `SELECT` list by alias. +SELECT city, car_model, RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank +FROM dealer +QUALIFY rank = 1; ++--------+------------+----+ +| city| car_model|rank| ++--------+------------+----+ +|San Jose|Honda Accord| 1| +| Dublin| Honda CRV| 1| +|San Jose| Honda Civic| 1| ++--------+------------+----+ + +-- `QUALIFY` clause with a window function directly in the predicate. +SELECT city, car_model +FROM dealer +QUALIFY RANK() OVER (PARTITION BY car_model ORDER BY quantity) = 1; ++--------+------------+ +| city| car_model| ++--------+------------+ +|San Jose|Honda Accord| +| Dublin| Honda CRV| +|San Jose| Honda Civic| ++--------+------------+ +``` + +### Related Statements + +* [SELECT Main](sql-ref-syntax-qry-select.html) +* [WHERE Clause](sql-ref-syntax-qry-select-where.html) +* [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html) +* [HAVING Clause](sql-ref-syntax-qry-select-having.html) +* [WINDOW Clause](sql-ref-syntax-qry-select-window.html) +* [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html) +* [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html) +* [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html) +* [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html) +* [LIMIT Clause](sql-ref-syntax-qry-select-limit.html) +* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html) diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md index 1d5532898c654..9ad9c431626d6 100644 --- a/docs/sql-ref-syntax-qry-select.md +++ b/docs/sql-ref-syntax-qry-select.md @@ -49,6 +49,8 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_ [ WHERE boolean_expression ] [ GROUP BY expression [ , ... ] ] [ HAVING boolean_expression ] + [ WINDOW clause ] + [ QUALIFY boolean_expression ] ``` ### Parameters @@ -122,6 +124,14 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_ filter rows after the grouping is performed. If HAVING is specified without GROUP BY, it indicates a GROUP BY without grouping expressions (global aggregate). +* **QUALIFY** + + Filters rows after window functions have been evaluated. The current `SELECT` list or the + `QUALIFY` condition must contain at least one window function, and aggregate functions are + not allowed inside the `QUALIFY` condition. Column references in `QUALIFY` are resolved + against input columns first; `SELECT` aliases are used only when there is no matching + input column. + * **ORDER BY** Specifies an ordering of the rows of the complete result set of the query. The output rows are ordered diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 7af34270693d2..59a0034f922e4 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -405,6 +405,7 @@ PROCEDURE: 'PROCEDURE'; PROCEDURES: 'PROCEDURES'; PROPERTIES: 'PROPERTIES'; PURGE: 'PURGE'; +QUALIFY: 'QUALIFY'; QUARTER: 'QUARTER'; QUERY: 'QUERY'; RANGE: 'RANGE'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 91f24b033aa22..1a0382dbe10c4 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -809,6 +809,7 @@ fromStatementBody aggregationClause? havingClause? windowClause? + qualifyClause? queryOrganization ; @@ -826,7 +827,8 @@ querySpecification whereClause? aggregationClause? havingClause? - windowClause? #regularQuerySpecification + windowClause? + qualifyClause? #regularQuerySpecification ; transformClause @@ -897,6 +899,10 @@ havingClause : HAVING booleanExpression ; +qualifyClause + : QUALIFY booleanExpression + ; + hint : HENT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HENT_END ; @@ -2135,6 +2141,7 @@ ansiNonReserved | PROCEDURES | PROPERTIES | PURGE + | QUALIFY | QUARTER | QUERY | RANGE @@ -2554,6 +2561,7 @@ nonReserved | PROCEDURES | PROPERTIES | PURGE + | QUALIFY | QUARTER | QUERY | RANGE diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1224b8ba18a3d..c09361969a9e4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -488,6 +488,7 @@ class Analyzer( ExtractWindowExpressions :: GlobalAggregates :: ResolveAggregateFunctions :: + ResolveQualify :: TimeWindowing :: SessionWindowing :: ResolveWindowTime :: @@ -1681,6 +1682,9 @@ class Analyzer( Project(child.output, r.copy(resolvedFinal, newChild)) } + // Skip the qualify clause here. It will be handled in ResolveQualify. + case q: UnresolvedQualify => q + // Filter can host both grouping expressions/aggregate functions and missing attributes. // The grouping expressions/aggregate functions resolution takes precedence over missing // attributes. See the classdoc of `ResolveReferences` for details. @@ -2401,6 +2405,8 @@ class Analyzer( // and we can't call `LateralJoin.resolveChildren` to resolve outer references. Here we // create a fake Project node as the outer plan. resolveSubQueries(j, Project(Nil, j.left)) + // Skip the qualify clause here. It will be handled in ResolveQualify. + case q: UnresolvedQualify => q // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries. case q: UnaryNode if q.childrenResolved => resolveSubQueries(q, q) @@ -3997,6 +4003,171 @@ class Analyzer( } } } + + /** + * This rule resolves the QUALIFY clause. It resolves the qualify condition against the child + * plan's output, extracts window expressions into aliases, validates that at least one window + * function is present, and builds a Filter placed after window materialization. + * + * The resolution follows these steps: + * 1. Resolve the qualify condition using the child plan's output (Project or Aggregate). + * 2. Extract canonically-unique WindowExpressions from the condition into aliases. + * 3. Reject standalone aggregate functions in the condition. + * 4. Validate that at least one window function exists in SELECT or in the condition. + * 5. Build: Filter(newCond, Project(child.output ++ windowAliases, newChild)) and trim output. + * + * The resulting Project node contains window expressions that will be handled by + * ExtractWindowExpressions in a subsequent analyzer iteration. + */ + object ResolveQualify extends Rule[LogicalPlan] { + /** + * Resolve subqueries in the condition expression using the fake Project pattern. + * This reuses the same approach as HAVING resolution (see line ~722). + */ + private def resolveConditionSubqueries( + cond: Expression, outer: LogicalPlan): Expression = { + if (SubqueryExpression.hasSubquery(cond)) { + val fake = Project(Alias(cond, "fake")() :: Nil, outer) + ResolveSubquery(fake).asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child + } else { + cond + } + } + + /** + * Resolve the qualify condition by recursing through the child plan tree + * (Project/Aggregate/Window/Filter) and adding missing attribute references + * to each intermediate node so they are preserved through window extraction. + */ + private def resolveQualifyCondition( + cond: Expression, + candidateAttrs: AttributeSet, + plan: LogicalPlan): (Expression, LogicalPlan) = plan match { + case agg: Aggregate => + val resolved = resolveExpressionByPlanChildren(cond, agg) + val subqueryResolved = resolveConditionSubqueries(resolved, agg.child) + val extraAggExprs = mutable.ArrayBuffer.empty[NamedExpression] + val newCond = subqueryResolved.transform { + case grouping if grouping.resolved && + agg.groupingExpressions.exists(grouping.semanticEquals) => + val allAggExprs = agg.aggregateExpressions ++ extraAggExprs.toSeq + val index = allAggExprs.indexWhere { + case Alias(child, _) => child.semanticEquals(grouping) + case o => o.semanticEquals(grouping) + } + if (index >= 0) { + allAggExprs(index).toAttribute + } else { + grouping match { + case ne: NamedExpression => + extraAggExprs += ne + ne.toAttribute + case o: Expression => + val alias = Alias(o, o.sql)() + extraAggExprs += alias + alias.toAttribute + } + } + case a: Attribute if a.resolved && !candidateAttrs.contains(a) => + a.failAnalysis( + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + messageParameters = Map( + "objectName" -> a.sql, + "proposal" -> candidateAttrs.map(_.sql).mkString(", "))) + } + val newAgg = agg.copy(aggregateExpressions = agg.aggregateExpressions ++ extraAggExprs) + (newCond, newAgg) + + case p: Project => + // First resolve against this Project's children output. + val resolved = resolveExpressionByPlanChildren(cond, p) + val subqueryResolved = resolveConditionSubqueries(resolved, p.child) + // Recurse into child to resolve any remaining unresolved references and + // propagate missing attributes through intermediate nodes. + val (newCond, newChild) = resolveQualifyCondition( + subqueryResolved, candidateAttrs, p.child) + val missingAttrs = (newCond.references -- p.outputSet).intersect(newChild.outputSet) + val newProject = p.copy(projectList = p.projectList ++ missingAttrs, child = newChild) + (newCond, newProject) + + case w: Window => + val (newCond, newChild) = resolveQualifyCondition(cond, candidateAttrs, w.child) + (newCond, w.copy(child = newChild)) + + case f: Filter => + val (newCond, newChild) = resolveQualifyCondition(cond, candidateAttrs, f.child) + (newCond, f.copy(child = newChild)) + + case other => + // Base case (SubqueryAlias, View, etc.): try resolving the condition + // using a fake UnresolvedQualify wrapper so resolveExpressionByPlanChildren + // can resolve against this node's output. + val resolved = cond.transformUp { + case u: UnresolvedAttribute => + other.output.resolve(u.nameParts, conf.resolver).getOrElse(u) + } + (resolved, other) + } + + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning( + _.containsPattern(UNRESOLVED_QUALIFY)) { + case q @ UnresolvedQualify(cond, child) if child.resolved => + // Save the original output before missing attrs are added. + val originalOutput = child.output + val (resolvedByChildren, newChild) = + resolveQualifyCondition(cond, child.outputSet, child) + // Also try resolving by the current child's output (for SELECT aliases). + val maybeResolved = if (resolvedByChildren.resolved) { + resolvedByChildren + } else { + resolveExpressionByPlanChildren(resolvedByChildren, q) + } + val newPlan = if (maybeResolved.resolved) { + // Extract canonically unique window functions in the qualify condition. + val windowExpressionToAliasMap = new java.util.LinkedHashMap[Expression, Alias]() + val newCond = maybeResolved.transform { + case windowExpression: WindowExpression => + val canonicalized = windowExpression.canonicalized + if (windowExpressionToAliasMap.containsKey(canonicalized)) { + windowExpressionToAliasMap.get(canonicalized).toAttribute + } else { + val alias = Alias(windowExpression, windowExpression.sql)() + windowExpressionToAliasMap.put(canonicalized, alias) + alias.toAttribute + } + case a: AggregateExpression => + throw QueryCompilationErrors.aggregateInQualifyNotAllowedError(a) + } + // Ensure at least one window function in SELECT or QUALIFY condition. + if (windowExpressionToAliasMap.size() == 0 && !hasWindowInPlan(child)) { + throw QueryCompilationErrors.qualifyRequiresWindowFunctionError() + } + if (windowExpressionToAliasMap.size() > 0) { + val projectList = + windowExpressionToAliasMap.values().asScala.toSeq + Filter(newCond, Project(newChild.output ++ projectList, newChild)) + } else { + Filter(newCond, newChild) + } + } else { + UnresolvedQualify(maybeResolved, newChild) + } + // Trim additional attributes to preserve original SELECT output. + if (newPlan.output.map(_.exprId) != originalOutput.map(_.exprId)) { + Project(originalOutput, newPlan) + } else { + newPlan + } + } + + /** Check if the resolved child plan already contains a Window node (from SELECT list). */ + private def hasWindowInPlan(plan: LogicalPlan): Boolean = plan match { + case _: Window => true + case p: Project => hasWindowInPlan(p.child) + case f: Filter => hasWindowInPlan(f.child) + case _ => false + } + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 622b0c319f991..8f9c565e56fb6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -1135,6 +1135,18 @@ case class UnresolvedHaving( final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_HAVING) } +/** + * Represents an unresolved QUALIFY clause. It is resolved by the analyzer into a Filter + * placed after window functions have been materialized. + */ +case class UnresolvedQualify(condition: Expression, child: LogicalPlan) extends UnaryNode { + override lazy val resolved: Boolean = false + override def output: Seq[Attribute] = child.output + override protected def withNewChildInternal(newChild: LogicalPlan): UnresolvedQualify = + copy(child = newChild) + final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_QUALIFY) +} + /** * A place holder expression used in random functions, will be replaced after analyze. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ca26530a85866..a346856c520ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -792,6 +792,7 @@ class AstBuilder extends DataTypeAstBuilder ctx.aggregationClause, ctx.havingClause, ctx.windowClause, + ctx.qualifyClause, plan, isPipeOperatorSelect = false ) @@ -1575,6 +1576,7 @@ class AstBuilder extends DataTypeAstBuilder ctx.aggregationClause, ctx.havingClause, ctx.windowClause, + ctx.qualifyClause, from, isPipeOperatorSelect = false ) @@ -1616,6 +1618,18 @@ class AstBuilder extends DataTypeAstBuilder UnresolvedHaving(predicate, plan) } + /** + * Create a logical plan using a qualify clause. + */ + private def withQualifyClause( + ctx: QualifyClauseContext, plan: LogicalPlan): LogicalPlan = { + val predicate = expression(ctx.booleanExpression) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + UnresolvedQualify(predicate, plan) + } + /** * Create a logical plan using a where clause. */ @@ -1663,6 +1677,7 @@ class AstBuilder extends DataTypeAstBuilder aggregationClause, havingClause, windowClause, + qualifyClause = null, isDistinct = false, isPipeOperatorSelect = false) @@ -1698,6 +1713,7 @@ class AstBuilder extends DataTypeAstBuilder aggregationClause: AggregationClauseContext, havingClause: HavingClauseContext, windowClause: WindowClauseContext, + qualifyClause: QualifyClauseContext, relation: LogicalPlan, isPipeOperatorSelect: Boolean): LogicalPlan = withOrigin(ctx) { val isDistinct = selectClause.setQuantifier() != null && @@ -1711,6 +1727,7 @@ class AstBuilder extends DataTypeAstBuilder aggregationClause, havingClause, windowClause, + qualifyClause, isDistinct, isPipeOperatorSelect) @@ -1726,6 +1743,7 @@ class AstBuilder extends DataTypeAstBuilder aggregationClause: AggregationClauseContext, havingClause: HavingClauseContext, windowClause: WindowClauseContext, + qualifyClause: QualifyClauseContext, isDistinct: Boolean, isPipeOperatorSelect: Boolean): LogicalPlan = { // Add lateral views. @@ -1786,11 +1804,18 @@ class AstBuilder extends DataTypeAstBuilder createProject() } + // Qualify + val withQualify = if (qualifyClause != null) { + withQualifyClause(qualifyClause, withProject) + } else { + withProject + } + // Distinct val withDistinct = if (isDistinct) { - Distinct(withProject) + Distinct(withQualify) } else { - withProject + withQualify } // Window @@ -7194,6 +7219,7 @@ class AstBuilder extends DataTypeAstBuilder aggregationClause = ctx.aggregationClause, havingClause = null, windowClause = ctx.windowClause, + qualifyClause = null, relation = left, isPipeOperatorSelect = true) }.getOrElse(Option(ctx.EXTEND).map { _ => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala index 4ed918328a16b..7956a9692dc61 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala @@ -63,6 +63,7 @@ object RuleIdCollection { "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation" :: "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolvePivot" :: "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveProcedures" :: + "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveQualify" :: "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRandomSeed" :: "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences" :: "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations" :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index 1e22c1ce86539..6af98240160bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -188,6 +188,7 @@ object TreePattern extends Enumeration { val UNRESOLVED_EVENT_TIME_WATERMARK: Value = Value val UNRESOLVED_HAVING: Value = Value val UNRESOLVED_HINT: Value = Value + val UNRESOLVED_QUALIFY: Value = Value val UNRESOLVED_FUNC: Value = Value val UNRESOLVED_PROCEDURE: Value = Value val UNRESOLVED_RELATION: Value = Value diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 94c76976f6cd1..75b77f71e08be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -867,6 +867,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("clauseName" -> clauseName)) } + def qualifyRequiresWindowFunctionError(): Throwable = { + new AnalysisException( + errorClass = "QUALIFY_REQUIRES_WINDOW_FUNCTION", + messageParameters = Map.empty) + } + + def aggregateInQualifyNotAllowedError(aggregateExpr: Expression): Throwable = { + new AnalysisException( + errorClass = "QUALIFY_AGGREGATE_NOT_ALLOWED", + messageParameters = Map("aggregateExpr" -> toSQLExpr(aggregateExpr)), + origin = aggregateExpr.origin) + } + def cannotSpecifyWindowFrameError(prettyName: String): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1035", diff --git a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala index 99c82345561d2..a0b4711c2747b 100644 --- a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala +++ b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala @@ -210,7 +210,7 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark val metadata = conn.getMetaData // scalastyle:off line.size.limit // CURRENT_PATH is excluded: getSQLKeywords drops SQL:2003 reserved words (see companion). - assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,CURRENT_DATABASE,CURRENT_SCHEMA,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") + assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,CURRENT_DATABASE,CURRENT_SCHEMA,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") // scalastyle:on line.size.limit } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/qualify.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/qualify.sql.out new file mode 100644 index 0000000000000..99b8a2c8b2e40 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/qualify.sql.out @@ -0,0 +1,193 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW dealer AS SELECT * FROM VALUES + (100, 'Fremont', 'Honda Civic', 10), + (100, 'Fremont', 'Honda Accord', 15), + (100, 'Fremont', 'Honda CRV', 7), + (200, 'Dublin', 'Honda Civic', 20), + (200, 'Dublin', 'Honda Accord', 10), + (200, 'Dublin', 'Honda CRV', 3), + (300, 'San Jose', 'Honda Civic', 5), + (300, 'San Jose', 'Honda Accord', 8) +AS dealer(id, city, car_model, quantity) +-- !query analysis +CreateViewCommand `dealer`, SELECT * FROM VALUES + (100, 'Fremont', 'Honda Civic', 10), + (100, 'Fremont', 'Honda Accord', 15), + (100, 'Fremont', 'Honda CRV', 7), + (200, 'Dublin', 'Honda Civic', 20), + (200, 'Dublin', 'Honda Accord', 10), + (200, 'Dublin', 'Honda CRV', 3), + (300, 'San Jose', 'Honda Civic', 5), + (300, 'San Jose', 'Honda Accord', 8) +AS dealer(id, city, car_model, quantity), false, true, LocalTempView, UNSUPPORTED, true + +- Project [id#x, city#x, car_model#x, quantity#x] + +- SubqueryAlias dealer + +- LocalRelation [id#x, city#x, car_model#x, quantity#x] + + +-- !query +SELECT city, car_model, RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank +FROM dealer +QUALIFY rank = 1 +ORDER BY car_model, city +-- !query analysis +Sort [car_model#x ASC NULLS FIRST, city#x ASC NULLS FIRST], true ++- Filter (rank#x = 1) + +- Project [city#x, car_model#x, rank#x] + +- Project [city#x, car_model#x, quantity#x, rank#x, rank#x] + +- Window [rank(quantity#x) windowspecdefinition(car_model#x, quantity#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank#x], [car_model#x], [quantity#x ASC NULLS FIRST] + +- Project [city#x, car_model#x, quantity#x] + +- SubqueryAlias dealer + +- View (`dealer`, [id#x, city#x, car_model#x, quantity#x]) + +- Project [cast(id#x as int) AS id#x, cast(city#x as string) AS city#x, cast(car_model#x as string) AS car_model#x, cast(quantity#x as int) AS quantity#x] + +- Project [id#x, city#x, car_model#x, quantity#x] + +- SubqueryAlias dealer + +- LocalRelation [id#x, city#x, car_model#x, quantity#x] + + +-- !query +SELECT city, car_model +FROM dealer +QUALIFY RANK() OVER (PARTITION BY car_model ORDER BY quantity) = 1 +ORDER BY car_model, city +-- !query analysis +Sort [car_model#x ASC NULLS FIRST, city#x ASC NULLS FIRST], true ++- Project [city#x, car_model#x] + +- Filter (RANK() OVER (PARTITION BY dealer.car_model ORDER BY dealer.quantity ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x = 1) + +- Project [city#x, car_model#x, quantity#x, RANK() OVER (PARTITION BY dealer.car_model ORDER BY dealer.quantity ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Project [city#x, car_model#x, quantity#x, RANK() OVER (PARTITION BY dealer.car_model ORDER BY dealer.quantity ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, RANK() OVER (PARTITION BY dealer.car_model ORDER BY dealer.quantity ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Window [rank(quantity#x) windowspecdefinition(car_model#x, quantity#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS RANK() OVER (PARTITION BY dealer.car_model ORDER BY dealer.quantity ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [car_model#x], [quantity#x ASC NULLS FIRST] + +- Project [city#x, car_model#x, quantity#x] + +- Project [city#x, car_model#x, quantity#x] + +- SubqueryAlias dealer + +- View (`dealer`, [id#x, city#x, car_model#x, quantity#x]) + +- Project [cast(id#x as int) AS id#x, cast(city#x as string) AS city#x, cast(car_model#x as string) AS car_model#x, cast(quantity#x as int) AS quantity#x] + +- Project [id#x, city#x, car_model#x, quantity#x] + +- SubqueryAlias dealer + +- LocalRelation [id#x, city#x, car_model#x, quantity#x] + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 3) +AS testData2(a, b) +-- !query analysis +CreateViewCommand `testData2`, SELECT * FROM VALUES + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 3) +AS testData2(a, b), false, true, LocalTempView, UNSUPPORTED, true + +- Project [a#x, b#x] + +- SubqueryAlias testData2 + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a, SUM(b) AS total +FROM testData2 +GROUP BY a +HAVING SUM(b) > 2 +QUALIFY ROW_NUMBER() OVER (ORDER BY a DESC) = 1 +-- !query analysis +Project [a#x, total#xL] ++- Filter (row_number() OVER (ORDER BY testdata2.a DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x = 1) + +- Project [a#x, total#xL, row_number() OVER (ORDER BY testdata2.a DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Project [a#x, total#xL, row_number() OVER (ORDER BY testdata2.a DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, row_number() OVER (ORDER BY testdata2.a DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Window [row_number() windowspecdefinition(a#x DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number() OVER (ORDER BY testdata2.a DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [a#x DESC NULLS LAST] + +- Project [a#x, total#xL] + +- Filter (total#xL > cast(2 as bigint)) + +- Aggregate [a#x], [a#x, sum(b#x) AS total#xL] + +- SubqueryAlias testdata2 + +- View (`testData2`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData2 + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a, SUM(b) AS total, ROW_NUMBER() OVER (ORDER BY a) AS rn +FROM testData2 +GROUP BY a +QUALIFY total > 1 +ORDER BY a +-- !query analysis +Sort [a#x ASC NULLS FIRST], true ++- Filter (total#xL > cast(1 as bigint)) + +- Project [a#x, total#xL, rn#x] + +- Project [a#x, total#xL, rn#x, rn#x] + +- Window [row_number() windowspecdefinition(a#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#x], [a#x ASC NULLS FIRST] + +- Aggregate [a#x], [a#x, sum(b#x) AS total#xL] + +- SubqueryAlias testdata2 + +- View (`testData2`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData2 + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a, total +FROM ( + SELECT a, SUM(b) AS total + FROM testData2 + GROUP BY a +) t +QUALIFY ROW_NUMBER() OVER (ORDER BY a) = 1 AND total > 1 +-- !query analysis +Project [a#x, total#xL] ++- Filter ((row_number() OVER (ORDER BY t.a ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x = 1) AND (total#xL > cast(1 as bigint))) + +- Project [a#x, total#xL, row_number() OVER (ORDER BY t.a ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Project [a#x, total#xL, row_number() OVER (ORDER BY t.a ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, row_number() OVER (ORDER BY t.a ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Window [row_number() windowspecdefinition(a#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number() OVER (ORDER BY t.a ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [a#x ASC NULLS FIRST] + +- Project [a#x, total#xL] + +- Project [a#x, total#xL] + +- SubqueryAlias t + +- Aggregate [a#x], [a#x, sum(b#x) AS total#xL] + +- SubqueryAlias testdata2 + +- View (`testData2`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData2 + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a +FROM testData2 +QUALIFY a = 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "QUALIFY_REQUIRES_WINDOW_FUNCTION", + "sqlState" : "42903" +} + + +-- !query +SELECT a, RANK() OVER (ORDER BY b) AS rank +FROM testData2 +QUALIFY COUNT(1) > 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "QUALIFY_AGGREGATE_NOT_ALLOWED", + "sqlState" : "42903", + "messageParameters" : { + "aggregateExpr" : "\"count(1)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 67, + "stopIndex" : 74, + "fragment" : "COUNT(1)" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/qualify.sql b/sql/core/src/test/resources/sql-tests/inputs/qualify.sql new file mode 100644 index 0000000000000..28dc782733968 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/qualify.sql @@ -0,0 +1,56 @@ +CREATE OR REPLACE TEMPORARY VIEW dealer AS SELECT * FROM VALUES + (100, 'Fremont', 'Honda Civic', 10), + (100, 'Fremont', 'Honda Accord', 15), + (100, 'Fremont', 'Honda CRV', 7), + (200, 'Dublin', 'Honda Civic', 20), + (200, 'Dublin', 'Honda Accord', 10), + (200, 'Dublin', 'Honda CRV', 3), + (300, 'San Jose', 'Honda Civic', 5), + (300, 'San Jose', 'Honda Accord', 8) +AS dealer(id, city, car_model, quantity); + +SELECT city, car_model, RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank +FROM dealer +QUALIFY rank = 1 +ORDER BY car_model, city; + +SELECT city, car_model +FROM dealer +QUALIFY RANK() OVER (PARTITION BY car_model ORDER BY quantity) = 1 +ORDER BY car_model, city; + +CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 3) +AS testData2(a, b); + +SELECT a, SUM(b) AS total +FROM testData2 +GROUP BY a +HAVING SUM(b) > 2 +QUALIFY ROW_NUMBER() OVER (ORDER BY a DESC) = 1; + +SELECT a, SUM(b) AS total, ROW_NUMBER() OVER (ORDER BY a) AS rn +FROM testData2 +GROUP BY a +QUALIFY total > 1 +ORDER BY a; + +SELECT a, total +FROM ( + SELECT a, SUM(b) AS total + FROM testData2 + GROUP BY a +) t +QUALIFY ROW_NUMBER() OVER (ORDER BY a) = 1 AND total > 1; + +SELECT a +FROM testData2 +QUALIFY a = 1; + +SELECT a, RANK() OVER (ORDER BY b) AS rank +FROM testData2 +QUALIFY COUNT(1) > 1; diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out index c941df66b64ba..11a103e6cc0e6 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out @@ -282,6 +282,7 @@ PROCEDURE false PROCEDURES false PROPERTIES false PURGE false +QUALIFY false QUARTER false QUERY false RANGE false diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out index ae13b363d28ec..1a7db9df073f4 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out @@ -282,6 +282,7 @@ PROCEDURE false PROCEDURES false PROPERTIES false PURGE false +QUALIFY false QUARTER false QUERY false RANGE false diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out index ae13b363d28ec..1a7db9df073f4 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out @@ -282,6 +282,7 @@ PROCEDURE false PROCEDURES false PROPERTIES false PURGE false +QUALIFY false QUARTER false QUERY false RANGE false diff --git a/sql/core/src/test/resources/sql-tests/results/qualify.sql.out b/sql/core/src/test/resources/sql-tests/results/qualify.sql.out new file mode 100644 index 0000000000000..5c34763953a19 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/qualify.sql.out @@ -0,0 +1,134 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW dealer AS SELECT * FROM VALUES + (100, 'Fremont', 'Honda Civic', 10), + (100, 'Fremont', 'Honda Accord', 15), + (100, 'Fremont', 'Honda CRV', 7), + (200, 'Dublin', 'Honda Civic', 20), + (200, 'Dublin', 'Honda Accord', 10), + (200, 'Dublin', 'Honda CRV', 3), + (300, 'San Jose', 'Honda Civic', 5), + (300, 'San Jose', 'Honda Accord', 8) +AS dealer(id, city, car_model, quantity) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT city, car_model, RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank +FROM dealer +QUALIFY rank = 1 +ORDER BY car_model, city +-- !query schema +struct +-- !query output +San Jose Honda Accord 1 +Dublin Honda CRV 1 +San Jose Honda Civic 1 + + +-- !query +SELECT city, car_model +FROM dealer +QUALIFY RANK() OVER (PARTITION BY car_model ORDER BY quantity) = 1 +ORDER BY car_model, city +-- !query schema +struct +-- !query output +San Jose Honda Accord +Dublin Honda CRV +San Jose Honda Civic + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 3) +AS testData2(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a, SUM(b) AS total +FROM testData2 +GROUP BY a +HAVING SUM(b) > 2 +QUALIFY ROW_NUMBER() OVER (ORDER BY a DESC) = 1 +-- !query schema +struct +-- !query output +3 3 + + +-- !query +SELECT a, SUM(b) AS total, ROW_NUMBER() OVER (ORDER BY a) AS rn +FROM testData2 +GROUP BY a +QUALIFY total > 1 +ORDER BY a +-- !query schema +struct +-- !query output +1 3 1 +2 3 2 +3 3 3 + + +-- !query +SELECT a, total +FROM ( + SELECT a, SUM(b) AS total + FROM testData2 + GROUP BY a +) t +QUALIFY ROW_NUMBER() OVER (ORDER BY a) = 1 AND total > 1 +-- !query schema +struct +-- !query output +1 3 + + +-- !query +SELECT a +FROM testData2 +QUALIFY a = 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "QUALIFY_REQUIRES_WINDOW_FUNCTION", + "sqlState" : "42903" +} + + +-- !query +SELECT a, RANK() OVER (ORDER BY b) AS rank +FROM testData2 +QUALIFY COUNT(1) > 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "QUALIFY_AGGREGATE_NOT_ALLOWED", + "sqlState" : "42903", + "messageParameters" : { + "aggregateExpr" : "\"count(1)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 67, + "stopIndex" : 74, + "fragment" : "COUNT(1)" + } ] +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 6e9f338557158..6d5d1b17c8760 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -1053,6 +1053,176 @@ class DataFrameWindowFunctionsSuite extends QueryTest "HAVING") } + test("QUALIFY filters window function results") { + withTempView("dealer") { + Seq( + (100, "Fremont", "Honda Civic", 10), + (100, "Fremont", "Honda Accord", 15), + (100, "Fremont", "Honda CRV", 7), + (200, "Dublin", "Honda Civic", 20), + (200, "Dublin", "Honda Accord", 10), + (200, "Dublin", "Honda CRV", 3), + (300, "San Jose", "Honda Civic", 5), + (300, "San Jose", "Honda Accord", 8) + ).toDF("id", "city", "car_model", "quantity").createOrReplaceTempView("dealer") + + val expectedWithRank = Seq( + Row("San Jose", "Honda Accord", 1), + Row("Dublin", "Honda CRV", 1), + Row("San Jose", "Honda Civic", 1)) + checkAnswer( + sql( + """ + |SELECT city, car_model, RANK() OVER (PARTITION BY car_model ORDER BY quantity) AS rank + |FROM dealer + |QUALIFY rank = 1 + """.stripMargin), + expectedWithRank) + + checkAnswer( + sql( + """ + |SELECT city, car_model + |FROM dealer + |QUALIFY RANK() OVER (PARTITION BY car_model ORDER BY quantity) = 1 + """.stripMargin), + expectedWithRank.map(row => Row(row.getString(0), row.getString(1)))) + } + } + + test("QUALIFY filters window function results after HAVING") { + withTempView("testData2") { + testData2.createOrReplaceTempView("testData2") + + checkAnswer( + sql( + """ + |SELECT a, SUM(b) AS total + |FROM testData2 + |GROUP BY a + |HAVING SUM(b) > 2 + |QUALIFY ROW_NUMBER() OVER (ORDER BY a DESC) = 1 + """.stripMargin), + Row(3, 3)) + } + } + + test("QUALIFY requires a current-query window function") { + withTempView("testData2") { + testData2.createOrReplaceTempView("testData2") + + checkError( + exception = intercept[AnalysisException] { + sql("SELECT a FROM testData2 QUALIFY a = 1").queryExecution.analyzed + }, + condition = "QUALIFY_REQUIRES_WINDOW_FUNCTION", + parameters = Map.empty) + } + } + + test("QUALIFY does not allow aggregate functions in its predicate") { + withTempView("testData2") { + testData2.createOrReplaceTempView("testData2") + + checkError( + exception = intercept[AnalysisException] { + sql("SELECT a, RANK() OVER (ORDER BY b) AS rank FROM testData2 QUALIFY COUNT(1) > 1") + .queryExecution.analyzed + }, + condition = "QUALIFY_AGGREGATE_NOT_ALLOWED", + parameters = Map("aggregateExpr" -> "\"count(1)\""), + context = ExpectedContext("COUNT(1)", 66, 73)) + } + } + + test("QUALIFY allows aggregate aliases in its predicate") { + withTempView("testData2") { + testData2.createOrReplaceTempView("testData2") + + checkAnswer( + sql( + """ + |SELECT a, SUM(b) AS total, ROW_NUMBER() OVER (ORDER BY a) AS rn + |FROM testData2 + |GROUP BY a + |QUALIFY total > 1 + """.stripMargin), + Seq(Row(1, 3, 1), Row(2, 3, 2), Row(3, 3, 3))) + } + } + + test("QUALIFY with correlated subquery in condition") { + withTempView("t1", "t2") { + Seq((1, 10), (2, 20), (3, 30)).toDF("k", "v").createOrReplaceTempView("t1") + Seq((1, 100), (2, 200)).toDF("k", "v").createOrReplaceTempView("t2") + + checkAnswer( + sql( + """ + |SELECT k, v, ROW_NUMBER() OVER (ORDER BY k) AS rn + |FROM t1 + |QUALIFY rn = 1 AND EXISTS (SELECT 1 FROM t2 WHERE t2.k = t1.k) + """.stripMargin), + Row(1, 10, 1)) + } + } + + test("QUALIFY rejects non-grouping column references with GROUP BY") { + withTempView("testData2") { + testData2.createOrReplaceTempView("testData2") + + val e = intercept[AnalysisException] { + sql( + """ + |SELECT a, SUM(b) AS total, ROW_NUMBER() OVER (ORDER BY a) AS rn + |FROM testData2 + |GROUP BY a + |QUALIFY b > 1 + """.stripMargin).queryExecution.analyzed + } + assert(e.getCondition == "UNRESOLVED_COLUMN.WITH_SUGGESTION") + assert(e.getMessageParameters.get("objectName").contains("b")) + } + } + + test("QUALIFY can reference columns not in SELECT list") { + withTempView("t") { + Seq((1, 10, "x"), (2, 20, "y"), (3, 30, "z")) + .toDF("a", "b", "c").createOrReplaceTempView("t") + + checkAnswer( + sql( + """ + |SELECT ROW_NUMBER() OVER (ORDER BY b) AS rn + |FROM t + |QUALIFY a = 1 + """.stripMargin), + Row(1)) + } + } + + test("QUALIFY with window in condition and non-selected column reference") { + withTempView("t") { + Seq((1, 10, "x"), (2, 20, "y"), (3, 30, "z")) + .toDF("a", "b", "c").createOrReplaceTempView("t") + + checkAnswer( + sql( + """ + |SELECT a + |FROM t + |QUALIFY ROW_NUMBER() OVER (ORDER BY b) = 1 AND c > 'w' + """.stripMargin), + Row(1)) + } + } + + test("QUALIFY is non-reserved in non-ANSI mode") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + checkAnswer(sql("SELECT qualify FROM VALUES (1) AS t(qualify)"), Row(1)) + } + } + test("window functions in multiple selects") { val df = Seq( ("S1", "P1", 100), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 3d064c904f19d..f1acab77fbc2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -22,8 +22,13 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.{SparkConf, SparkThrowable} import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedHaving, UnresolvedRelation, UnresolvedStar} -import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, GreaterThan, Literal, NullsFirst, SortOrder, UnresolvedWindowExpression, UnspecifiedFrame, WindowSpecDefinition, WindowSpecReference} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, + UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedHaving, + UnresolvedQualify, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, + Concat, EqualTo, GreaterThan, Literal, NullsFirst, SortOrder, + UnresolvedWindowExpression, UnspecifiedFrame, WindowSpecDefinition, + WindowSpecReference} import org.apache.spark.sql.catalyst.parser.{AbstractParser, ParseException} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern._ @@ -787,6 +792,25 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession { stop = 264)) } + test("QUALIFY clause") { + // QUALIFY with alias reference - SELECT list has window function + val plan1 = parser.parsePlan( + "SELECT a, RANK() OVER (ORDER BY b) AS rank " + + "FROM testData2 QUALIFY rank = 1") + assert(plan1.isInstanceOf[UnresolvedQualify]) + val q1 = plan1.asInstanceOf[UnresolvedQualify] + assert(q1.child.isInstanceOf[Project]) + + // QUALIFY with window function in condition + val plan2 = parser.parsePlan( + "SELECT a FROM testData2 " + + "QUALIFY RANK() OVER (ORDER BY b) = 1") + assert(plan2.isInstanceOf[UnresolvedQualify]) + val q2 = plan2.asInstanceOf[UnresolvedQualify] + assert(q2.condition.isInstanceOf[EqualTo]) + assert(q2.child.isInstanceOf[Project]) + } + test("CLEAR CACHE") { assertEqual("CLEAR CACHE", ClearCacheCommand) } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index a9446750a53a6..c9696a1b2fe68 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { val sessionHandle = client.openSession(user, "") val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS) // scalastyle:off line.size.limit - assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATABASE,CURRENT_DATE,CURRENT_PATH,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") + assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATABASE,CURRENT_DATE,CURRENT_PATH,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") // scalastyle:on line.size.limit } }