From 400856800948aa259d0e595abb61951fcc5ce2b6 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 28 May 2026 19:42:59 +0800 Subject: [PATCH] [CH] Fix str_to_map nullable input null-map access SparkFunctionStrToMap iterates rows with index i, but the nullable string path checked null_map[n], where n is offsets.size(). For any non-empty nullable input this reads one element past the null map and can produce an incorrect null decision, undefined behavior, or a crash. Use the current row index when reading the nullable null map. Add a ClickHouse backend regression test that evaluates str_to_map over a nullable string expression containing both null and non-null rows, then compares the result with vanilla Spark. --- .../execution/GlutenFunctionValidateSuite.scala | 14 ++++++++++++++ .../Functions/SparkFunctionStrToMap.cpp | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala index 6a0196e2b18..66ea6c98669 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala @@ -489,6 +489,20 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS runQueryAndCompare(sql1)(checkGlutenPlan[ProjectExecTransformer]) } + test("test str2map with nullable string input") { + val sql = + """ + |select id, str_to_map(str, ',', ':') + |from ( + | select id, + | if(id = 1, cast(null as string), concat('k:', cast(id as string))) as str + | from range(4) + |) + |order by id + |""".stripMargin + runQueryAndCompare(sql)(checkGlutenPlan[ProjectExecTransformer]) + } + test("test parse_url") { val sql1 = """ diff --git a/cpp-ch/local-engine/Functions/SparkFunctionStrToMap.cpp b/cpp-ch/local-engine/Functions/SparkFunctionStrToMap.cpp index 8936ea0d04b..e9c1d80c066 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionStrToMap.cpp +++ b/cpp-ch/local-engine/Functions/SparkFunctionStrToMap.cpp @@ -240,7 +240,7 @@ class SparkFunctionStrToMap : public DB::IFunction for (size_t i = 0, n = offsets.size(); i < n; ++i) { - if (null_map && (*null_map)[n] != 0) + if (null_map && (*null_map)[i] != 0) col_map->insertDefault(); else {