Skip to content

Commit

Permalink
[SPARK-37389][SQL] Check unclosed bracketed comments
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
The SQL below has unclosed bracketed comment.
```
/*abc*/
select 1 as a
/*

2 as b
/*abc*/
, 3 as c

/**/
;
```
But Spark outputs:
<!DOCTYPE html>

a |
-- |
1 |

PostgreSQL also supports the feature, but output:
```
SQL 错误 [42601]: Unterminated block comment started at position 22 in SQL /*abc*/
select 1 as a
/*

2 as b
/*abc*/
, 3 as c

/**/
;. Expected */ sequence
```

### Why are the changes needed?
The execute plan is not expected, if we don't check unclosed bracketed comments.

### Does this PR introduce _any_ user-facing change?
'Yes'. The behavior of bracketed comments will more correctly.

### How was this patch tested?
New tests.

Closes #34668 from beliefer/SPARK-37389.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
beliefer authored and cloud-fan committed Nov 24, 2021
1 parent c1029dc commit ff249c3
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 2 deletions.
Expand Up @@ -36,6 +36,11 @@ grammar SqlBase;
}

@lexer::members {
/**
* When true, parser should throw ParseExcetion for unclosed bracketed comment.
*/
public boolean has_unclosed_bracketed_comment = false;
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
Expand Down Expand Up @@ -73,6 +78,16 @@ grammar SqlBase;
return false;
}
}

/**
* This method will be called when the character stream ends and try to find out the
* unclosed bracketed comment.
* If the method be called, it means the end of the entire character stream match,
* and we set the flag and fail later.
*/
public void markUnclosedComment() {
has_unclosed_bracketed_comment = true;
}
}

singleStatement
Expand Down Expand Up @@ -1926,7 +1941,7 @@ SIMPLE_COMMENT
;

BRACKETED_COMMENT
: '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
: '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
;

WS
Expand Down
Expand Up @@ -101,6 +101,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
val tokenStream = new CommonTokenStream(lexer)
val parser = new SqlBaseParser(tokenStream)
parser.addParseListener(PostProcessor)
parser.addParseListener(UnclosedCommentProcessor(command, tokenStream))
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)
parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
Expand Down Expand Up @@ -313,3 +314,58 @@ case object PostProcessor extends SqlBaseBaseListener {
parent.addChild(new TerminalNodeImpl(f(newToken)))
}
}

/**
* The post-processor checks the unclosed bracketed comment.
*/
case class UnclosedCommentProcessor(
command: String, tokenStream: CommonTokenStream) extends SqlBaseBaseListener {

override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleExpression(ctx: SqlBaseParser.SingleExpressionContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleTableIdentifier(ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleFunctionIdentifier(
ctx: SqlBaseParser.SingleFunctionIdentifierContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleMultipartIdentifier(
ctx: SqlBaseParser.SingleMultipartIdentifierContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleTableSchema(ctx: SqlBaseParser.SingleTableSchemaContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitQuery(ctx: SqlBaseParser.QueryContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

override def exitSingleStatement(ctx: SqlBaseParser.SingleStatementContext): Unit = {
checkUnclosedComment(tokenStream, command)
}

/** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */
private def checkUnclosedComment(tokenStream: CommonTokenStream, command: String) = {
assert(tokenStream.getTokenSource.isInstanceOf[SqlBaseLexer])
val lexer = tokenStream.getTokenSource.asInstanceOf[SqlBaseLexer]
if (lexer.has_unclosed_bracketed_comment) {
// The last token is 'EOF' and the penultimate is unclosed bracketed comment
val failedToken = tokenStream.get(tokenStream.size() - 2)
assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT)
val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
throw QueryParsingErrors.unclosedBracketedCommentError(command, position)
}
}

}
Expand Up @@ -425,4 +425,8 @@ object QueryParsingErrors {
new ParseException(
s"Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: '$databaseName'", ctx)
}

def unclosedBracketedCommentError(command: String, position: Origin): Throwable = {
new ParseException(Some(command), "Unclosed bracketed comment", position, position)
}
}
Expand Up @@ -160,6 +160,56 @@ class PlanParserSuite extends AnalysisTest {
""".stripMargin, plan)
}

test("nested bracketed comment case seven") {
val plan = OneRowRelation().select(Literal(1).as("a"))
assertEqual(
"""
|/*abc*/
|select 1 as a
|/*
|
|2 as b
|/*abc */
|, 3 as c
|
|/**/
|*/
""".stripMargin, plan)
}

test("unclosed bracketed comment one") {
val query = """
|/*abc*/
|select 1 as a
|/*
|
|2 as b
|/*abc */
|, 3 as c
|
|/**/
|""".stripMargin
val e = intercept[ParseException](parsePlan(query))
assert(e.getMessage.contains(s"Unclosed bracketed comment"))
}

test("unclosed bracketed comment two") {
val query = """
|/*abc*/
|select 1 as a
|/*
|
|2 as b
|/*abc */
|, 3 as c
|
|/**/
|select 4 as d
|""".stripMargin
val e = intercept[ParseException](parsePlan(query))
assert(e.getMessage.contains(s"Unclosed bracketed comment"))
}

test("case insensitive") {
val plan = table("a").select(star())
assertEqual("sELEct * FroM a", plan)
Expand Down
29 changes: 29 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/comments.sql
Expand Up @@ -88,3 +88,32 @@ Other information of first level.
/*/**/*/
SELECT 'selected content' AS tenth;
--QUERY-DELIMITER-END

-- the first case of unclosed bracketed comment
--QUERY-DELIMITER-START
/*abc*/
select 1 as a
/*
2 as b
/*abc*/
, 3 as c

/**/
;
--QUERY-DELIMITER-END

-- the second case of unclosed bracketed comment
--QUERY-DELIMITER-START
/*abc*/
select 1 as a
/*
2 as b
/*abc*/
, 3 as c

/**/
select 4 as d
;
--QUERY-DELIMITER-END
64 changes: 63 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/comments.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 10
-- Number of queries: 12


-- !query
Expand Down Expand Up @@ -119,3 +119,65 @@ SELECT 'selected content' AS tenth
struct<tenth:string>
-- !query output
selected content


-- !query
/*abc*/
select 1 as a
/*

2 as b
/*abc*/
, 3 as c

/**/
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

Unclosed bracketed comment(line 3, pos 0)

== SQL ==
/*abc*/
select 1 as a
/*
^^^

2 as b
/*abc*/
, 3 as c

/**/


-- !query
/*abc*/
select 1 as a
/*

2 as b
/*abc*/
, 3 as c

/**/
select 4 as d
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

Unclosed bracketed comment(line 3, pos 0)

== SQL ==
/*abc*/
select 1 as a
/*
^^^

2 as b
/*abc*/
, 3 as c

/**/
select 4 as d

0 comments on commit ff249c3

Please sign in to comment.