From 2b8e9f505051e20968b187f23fbaee0c8d3e7b90 Mon Sep 17 00:00:00 2001 From: Norio Date: Thu, 23 Apr 2026 00:00:00 +0000 Subject: [PATCH] [SPARK-57128][SQL][TESTS] SQLQueryTestHelper --SET parser must preserve commas in config values What changes were proposed in this pull request? `SQLQueryTestHelper.getSparkSettings` splits `--SET` directive values on every comma, which conflicts with Spark configs whose values themselves contain commas (e.g. `spark.sql.optimizer.excludedRules` accepts a comma-separated rule list). The current parser crashes with `StringIndexOutOfBoundsException` when it encounters such a value. Change the split to only occur at commas that are immediately followed by what looks like a new `key=` (word characters or dots ending in `=`). This preserves the documented multi-setting form `--SET k1=v1,k2=v2` while allowing values to contain commas. Adds `SQLQueryTestHelperSuite` with focused unit tests. Why are the changes needed? The parser cannot currently express settings whose values contain commas, forcing users to scope down their SET to a single value. This was hit when trying to specify a multi-rule `excludedRules` value in Apache Gluten's spark41 SQL test workaround (apache/incubator-gluten#12165). Does this PR introduce any user-facing change? No. Test-framework-only change. Existing tests that rely on the documented multi-setting form continue to parse as before. How was this patch tested? New `SQLQueryTestHelperSuite` with 6 cases covering: single setting, multi- setting in one `--SET`, multiple `--SET` lines, comma-containing value, mixed, and non-SET comments. All pass. --- .../apache/spark/sql/SQLQueryTestHelper.scala | 7 +- .../spark/sql/SQLQueryTestHelperSuite.scala | 64 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelperSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 8028970193acd..e6f36cf9f7084 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -474,7 +474,12 @@ trait SQLQueryTestHelper extends SQLConfHelper with Logging { protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) - settingLines.flatMap(_.split(",").map { kv => + // Split on commas that are followed by what looks like a new `key=`. This preserves + // commas inside config values such as + // --SET spark.sql.optimizer.excludedRules=Rule1,Rule2 + // while still supporting the documented multi-setting form + // --SET key1=v1,key2=v2 + settingLines.flatMap(_.split(",(?=[\\w.]+=)").map { kv => val (conf, value) = kv.span(_ != '=') conf.trim -> value.substring(1).trim }) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelperSuite.scala new file mode 100644 index 0000000000000..f6642cd9a5c65 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelperSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import org.apache.spark.SparkFunSuite + +class SQLQueryTestHelperSuite extends SparkFunSuite with SQLQueryTestHelper { + + test("getSparkSettings: single key=value") { + val result = getSparkSettings(Array("--SET spark.sql.foo=1")) + assert(result.toSeq === Seq("spark.sql.foo" -> "1")) + } + + test("getSparkSettings: multiple key=value pairs in one --SET (documented form)") { + val result = getSparkSettings(Array("--SET spark.sql.foo=1,spark.sql.bar=2")) + assert(result.toSeq === Seq("spark.sql.foo" -> "1", "spark.sql.bar" -> "2")) + } + + test("getSparkSettings: multiple --SET statements") { + val result = getSparkSettings( + Array("--SET spark.sql.foo=1", "--SET spark.sql.bar=2")) + assert(result.toSeq === Seq("spark.sql.foo" -> "1", "spark.sql.bar" -> "2")) + } + + test("getSparkSettings: value containing commas (e.g. excludedRules list)") { + val excludedRules = + "org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation," + + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding" + val result = getSparkSettings( + Array(s"--SET spark.sql.optimizer.excludedRules=$excludedRules")) + assert(result.toSeq === Seq("spark.sql.optimizer.excludedRules" -> excludedRules)) + } + + test("getSparkSettings: mixed -- multiple settings where one value contains commas") { + val excludedRules = + "org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation," + + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding" + val result = getSparkSettings( + Array(s"--SET spark.sql.optimizer.excludedRules=$excludedRules,spark.sql.foo=1")) + assert(result.toSeq === Seq( + "spark.sql.optimizer.excludedRules" -> excludedRules, + "spark.sql.foo" -> "1")) + } + + test("getSparkSettings: ignores non --SET comments") { + val result = getSparkSettings( + Array("-- a comment", "--SET spark.sql.foo=1", "-- another")) + assert(result.toSeq === Seq("spark.sql.foo" -> "1")) + } +}