From f33ca390721c62da69868259dc67c2e76556e3c3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 20 Aug 2021 21:38:02 +0800 Subject: [PATCH 1/2] [SPARK-36552][SQL] Fix different behavior for writing char/varchar to hive and datasource table --- .../spark/sql/hive/client/HiveClientImpl.scala | 7 +++++-- .../spark/sql/HiveCharVarcharTestSuite.scala | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index b1c83af228d18..edb1c125f559f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -999,8 +999,11 @@ private[hive] object HiveClientImpl extends Logging { // For Hive Serde, we still need to to restore the raw type for char and varchar type. // When reading data in parquet, orc, or avro file format with string type for char, // the tailing spaces may lost if we are not going to pad it. - val typeString = CharVarcharUtils.getRawTypeString(c.metadata) - .getOrElse(c.dataType.catalogString) + val typeString = if (SQLConf.get.charVarcharAsString) { + c.dataType.catalogString + } else { + CharVarcharUtils.getRawTypeString(c.metadata).getOrElse(c.dataType.catalogString) + } new FieldSchema(c.name, typeString, c.getComment().orNull) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala index 48406d99a5cd3..182047a8c64db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -59,6 +59,20 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet checkAnswer(sql("SELECT v from t where c = 'Spark' and v = 'kyuubi'"), Row("kyuubi")) } } + + test("SPARK-36552: Fix different behavior of writing char/varchar to hive and datasource table") { + Seq("true", "false").foreach { v => + withSQLConf( + "spark.sql.hive.convertMetastoreParquet" -> v, + "spark.sql.legacy.charVarcharAsString" -> "true") { + withTable("t") { + sql(s"CREATE TABLE t (c varchar(2)) USING $format") + sql("INSERT INTO t SELECT 'kyuubi'") + checkAnswer(sql("SELECT c from t"), Row("kyuubi")) + } + } + } + } } class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton { From c4d25aa0997a6c84bdc84e6907bc4f7693ba780a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 21 Aug 2021 21:14:47 +0800 Subject: [PATCH 2/2] ci