diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 41b04c5dc6c1..769f6d8bc3d9 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -10,7 +10,6 @@ aliyun-java-sdk-ram/3.1.0//aliyun-java-sdk-ram-3.1.0.jar
aliyun-sdk-oss/3.13.2//aliyun-sdk-oss-3.13.2.jar
analyticsaccelerator-s3/1.3.0//analyticsaccelerator-s3-1.3.0.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
aopalliance-repackaged/3.0.6//aopalliance-repackaged-3.0.6.jar
arpack/3.0.4//arpack-3.0.4.jar
arpack_combined_all/0.1//arpack_combined_all-0.1.jar
diff --git a/pom.xml b/pom.xml
index 85e9f07ad3ee..3e2da5baebe0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -92,6 +92,7 @@
mllib-local
tools
streaming
+ sql/parser
sql/api
sql/catalyst
sql/core
@@ -2482,6 +2483,7 @@
org.antlr
antlr4-runtime
${antlr4.version}
+ provided
com.ibm.icu
@@ -2819,11 +2821,6 @@
true
-
- org.antlr
- antlr4-maven-plugin
- ${antlr4.version}
-
org.apache.maven.plugins
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 94e8f0d9b9a9..235658a6dc44 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -47,9 +47,9 @@ object BuildCommons {
private val buildLocation = file(".").getAbsoluteFile.getParentFile
- val sqlProjects@Seq(sqlApi, catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) =
+ val sqlProjects@Seq(sqlApi, catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf, sqlParser) =
Seq("sql-api", "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10",
- "sql-kafka-0-10", "avro", "protobuf").map(ProjectRef(buildLocation, _))
+ "sql-kafka-0-10", "avro", "protobuf", "sql-parser").map(ProjectRef(buildLocation, _))
val streamingProjects@Seq(streaming, streamingKafka010) =
Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
@@ -403,7 +403,7 @@ object SparkBuild extends PomBuild {
Seq(
spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
unsafe, tags, tokenProviderKafka010, sqlKafka010, pipelines, connectCommon, connect,
- connectJdbc, connectClient, variant, connectShims, profiler, commonUtilsJava
+ connectJdbc, connectClient, variant, connectShims, profiler, commonUtilsJava, sqlParser
).contains(x)
}
@@ -437,7 +437,10 @@ object SparkBuild extends PomBuild {
/* Enable unidoc only for the root spark project */
enable(Unidoc.settings)(spark)
- /* Sql-api ANTLR generation settings */
+ /* Spark SQL Parser ANTLR generation settings */
+ enable(SqlParser.settings)(sqlParser)
+
+ /* Spark API */
enable(SqlApi.settings)(sqlApi)
/* Spark SQL Core settings */
@@ -1274,7 +1277,7 @@ object OldDeps {
)
}
-object SqlApi {
+object SqlParser {
import com.simplytyped.Antlr4Plugin
import com.simplytyped.Antlr4Plugin.autoImport._
@@ -1283,7 +1286,31 @@ object SqlApi {
(Antlr4 / antlr4PackageName) := Some("org.apache.spark.sql.catalyst.parser"),
(Antlr4 / antlr4GenListener) := true,
(Antlr4 / antlr4GenVisitor) := true,
- (Antlr4 / antlr4TreatWarningsAsErrors) := true
+ (Antlr4 / antlr4TreatWarningsAsErrors) := true,
+
+ excludeDependencies ++= Seq(
+ ExclusionRule("com.google.guava", "guava"),
+ ExclusionRule("org.jpmml", "pmml-model")
+ ),
+
+ (assembly / logLevel) := Level.Info,
+ // Exclude `scala-library` from assembly.
+ (assembly / assemblyPackageScala / assembleArtifact) := false,
+ assembly / assemblyShadeRules := Seq(
+ ShadeRule.rename("org.antlr.v4.runtime.**" -> "org.sparkproject.antlr.v4.runtime.@1").inAll
+ ),
+ (assembly / assemblyMergeStrategy) := {
+ case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
+ case _ => MergeStrategy.first
+ },
+ (assembly / assemblyJarName) := s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.jar"
+ )
+}
+
+object SqlApi {
+ lazy val settings = Seq(
+ // Assembly sql-parser to shade antlr4-runtime classes
+ Compile / compile := ((Compile / compile) dependsOn LocalProject("sql-parser") / assembly).value,
)
}
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 614ed61de7ed..9a7c926fcc73 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -64,6 +64,11 @@
${project.version}
compile
+
+ org.apache.spark
+ spark-sql-parser_${scala.binary.version}
+ ${project.version}
+
org.apache.commons
commons-lang3
@@ -72,10 +77,6 @@
org.json4s
json4s-jackson_${scala.binary.version}
-
- org.antlr
- antlr4-runtime
-
org.apache.arrow
arrow-vector
@@ -85,26 +86,4 @@
arrow-memory-netty
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- org.antlr
- antlr4-maven-plugin
-
-
-
- antlr4
-
-
-
-
- true
- ../api/src/main/antlr4
- true
-
-
-
-
\ No newline at end of file
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 212c80a3cb43..b6f9a6e42064 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -20,8 +20,8 @@ import java.util.Locale
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.{ParserRuleContext, Token}
-import org.antlr.v4.runtime.tree.ParseTree
+import org.sparkproject.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.sparkproject.antlr.v4.runtime.tree.ParseTree
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
index 461d79ec22cf..f2b3861afb69 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
@@ -17,8 +17,8 @@
package org.apache.spark.sql.catalyst.parser
-import org.antlr.v4.runtime.{DefaultErrorStrategy, InputMismatchException, IntStream, NoViableAltException, Parser, ParserRuleContext, RecognitionException, Recognizer, Token}
-import org.antlr.v4.runtime.misc.ParseCancellationException
+import org.sparkproject.antlr.v4.runtime.{DefaultErrorStrategy, InputMismatchException, IntStream, NoViableAltException, Parser, ParserRuleContext, RecognitionException, Recognizer, Token}
+import org.sparkproject.antlr.v4.runtime.misc.ParseCancellationException
/**
* A [[SparkRecognitionException]] extends the [[RecognitionException]] with more information
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala
index f32c1d6f3836..9fa23f640d5c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.parser
import scala.collection.mutable
-import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
+import org.sparkproject.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.util.SparkParserUtils.withOrigin
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
index 32270df0a988..99b8293ad40d 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
@@ -20,11 +20,11 @@ import java.util.concurrent.atomic.AtomicReference
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime._
-import org.antlr.v4.runtime.atn.{ATN, ParserATNSimulator, PredictionContextCache, PredictionMode}
-import org.antlr.v4.runtime.dfa.DFA
-import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
-import org.antlr.v4.runtime.tree.TerminalNodeImpl
+import org.sparkproject.antlr.v4.runtime._
+import org.sparkproject.antlr.v4.runtime.atn.{ATN, ParserATNSimulator, PredictionContextCache, PredictionMode}
+import org.sparkproject.antlr.v4.runtime.dfa.DFA
+import org.sparkproject.antlr.v4.runtime.misc.{Interval, Pair, ParseCancellationException}
+import org.sparkproject.antlr.v4.runtime.tree.TerminalNodeImpl
import org.apache.spark.{QueryContext, SparkException, SparkThrowable, SparkThrowableHelper}
import org.apache.spark.internal.{Logging, LogKeys}
@@ -343,7 +343,7 @@ case object PostProcessor extends SqlBaseParserBaseListener {
parent.removeLastChild()
val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
val newToken = new CommonToken(
- new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
+ new Pair(token.getTokenSource, token.getInputStream),
SqlBaseParser.IDENTIFIER,
token.getChannel,
token.getStartIndex + stripMargins,
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
index ee296a6fb664..6c3276d73312 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
@@ -18,9 +18,9 @@ package org.apache.spark.sql.catalyst.util
import java.lang.{Long => JLong, StringBuilder => JStringBuilder}
-import org.antlr.v4.runtime.{ParserRuleContext, Token}
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.tree.TerminalNode
+import org.sparkproject.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.sparkproject.antlr.v4.runtime.misc.Interval
+import org.sparkproject.antlr.v4.runtime.tree.TerminalNode
import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 553161ea2db0..3328bb1daa1c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.errors
import java.util.Locale
-import org.antlr.v4.runtime.ParserRuleContext
+import org.sparkproject.antlr.v4.runtime.ParserRuleContext
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.parser.ParseException
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
index 216136d8a7c8..d8d4c69eafa4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
@@ -16,7 +16,7 @@
*/
package org.apache.spark.sql.catalyst.parser
-import org.antlr.v4.runtime.ParserRuleContext
+import org.sparkproject.antlr.v4.runtime.ParserRuleContext
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.Expression
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index d1d4a6b8c980..9572bda92734 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -23,8 +23,8 @@ import java.util.concurrent.TimeUnit
import scala.collection.mutable.{ArrayBuffer, HashMap, ListBuffer, Set}
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.{ParserRuleContext, RuleContext, Token}
-import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
+import org.sparkproject.antlr.v4.runtime.{ParserRuleContext, RuleContext, Token}
+import org.sparkproject.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkThrowable, SparkThrowableHelper}
import org.apache.spark.internal.Logging
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 336db1382f89..06c53d5ab3a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -22,9 +22,9 @@ import java.util.Locale
import scala.collection.{immutable, mutable}
import scala.util.matching.Regex
-import org.antlr.v4.runtime.{ParserRuleContext, Token}
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.tree.{ParseTree, TerminalNodeImpl}
+import org.sparkproject.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.sparkproject.antlr.v4.runtime.misc.Interval
+import org.sparkproject.antlr.v4.runtime.tree.{ParseTree, TerminalNodeImpl}
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala
index 9beead0e6487..eac75422e27d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala
@@ -16,7 +16,7 @@
*/
package org.apache.spark.sql.catalyst.parser
-import org.antlr.v4.runtime.{CharStreams, CommonTokenStream}
+import org.sparkproject.antlr.v4.runtime.{CharStreams, CommonTokenStream}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.AnalysisException
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
index 5a2eefcb0e93..4674238175db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -30,11 +30,11 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
val sqlSyntaxDefs = {
val sqlBaseParserPath =
- getWorkspaceFilePath("sql", "api", "src", "main", "antlr4", "org",
+ getWorkspaceFilePath("sql", "parser", "src", "main", "antlr4", "org",
"apache", "spark", "sql", "catalyst", "parser", "SqlBaseParser.g4").toFile
val sqlBaseLexerPath =
- getWorkspaceFilePath("sql", "api", "src", "main", "antlr4", "org",
+ getWorkspaceFilePath("sql", "parser", "src", "main", "antlr4", "org",
"apache", "spark", "sql", "catalyst", "parser", "SqlBaseLexer.g4").toFile
(Files.readString(sqlBaseParserPath.toPath) +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index 96054f551ee4..4ab7535d032e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.parser
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
+import org.sparkproject.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 550c23e3e830..0cf3cdc6cb97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -22,8 +22,8 @@ import java.util.{Locale, TimeZone}
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.ParserRuleContext
-import org.antlr.v4.runtime.tree.TerminalNode
+import org.sparkproject.antlr.v4.runtime.ParserRuleContext
+import org.sparkproject.antlr.v4.runtime.tree.TerminalNode
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
diff --git a/sql/parser/pom.xml b/sql/parser/pom.xml
new file mode 100644
index 000000000000..cae6287c9aba
--- /dev/null
+++ b/sql/parser/pom.xml
@@ -0,0 +1,92 @@
+
+
+
+
+ 4.0.0
+
+ org.apache.spark
+ spark-parent_2.13
+ 4.2.0-SNAPSHOT
+ ../../pom.xml
+
+
+ spark-sql-parser_2.13
+ jar
+ Spark Project SQL Parser
+ https://spark.apache.org/
+
+ sql-parser
+
+
+
+
+ org.scala-lang
+ scala-reflect
+
+
+ org.antlr
+ antlr4-runtime
+ ${antlr4.version}
+ compile
+
+
+
+ target/scala-${scala.binary.version}/classes
+ target/scala-${scala.binary.version}/test-classes
+
+
+ org.antlr
+ antlr4-maven-plugin
+ ${antlr4.version}
+
+
+
+ antlr4
+
+
+
+
+ true
+ ../parser/src/main/antlr4
+ true
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+
+ org.antlr:antlr4-runtime
+
+
+
+
+ org.antlr.v4.runtime
+ ${spark.shade.packageName}.antlr.v4.runtime
+
+ org.antlr.v4.runtime.**
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/parser/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
similarity index 100%
rename from sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
rename to sql/parser/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/parser/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
similarity index 100%
rename from sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
rename to sql/parser/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4