[KYUUBI #2207]Support newly added spark data types: TimestampNTZType

Nick-0723 · ulysses-you · commit dcc71b3049e7 · 2022-03-29T18:43:03.000+08:00
### _Why are the changes needed?_ support newly added data types: TimestampNTZType since Spark3.3.0 ### _How was this patch tested?_ - [X] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [X] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #2224 from Nick-0723/TimestampNTZType. Closes #2207 56dfb2d [Nick Song] replace to getSimpleName 10b12f3 [Nick Song] fix 90218ca [Nick Song] fix 348d7d0 [Nick Song] fix 54ef78d [Nick Song] support DayTimeIntervalType Authored-by: Nick Song <chun2184@163.com> Signed-off-by: ulysses-you <ulyssesyou@apache.org>
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
@@ -29,6 +29,7 @@ import org.apache.hive.service.rpc.thrift._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.types._
 
+import org.apache.kyuubi.engine.spark.schema.SchemaHelper.TIMESTAMP_NTZ
 import org.apache.kyuubi.util.RowSetUtils._
 
 object RowSet {
@@ -241,6 +242,9 @@ object RowSet {
       case (t: Timestamp, TimestampType) =>
         formatTimestamp(t)
 
+      case (t: LocalDateTime, ntz) if ntz.getClass.getName.equals(TIMESTAMP_NTZ) =>
+        formatLocalDateTime(t)
+
       case (i: Instant, TimestampType) =>
         formatInstant(i, Option(timeZone))
 
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala
@@ -26,6 +26,11 @@ import org.apache.spark.sql.types._
 
 object SchemaHelper {
 
+  /**
+   * Spark 3.3.0 DataType TimestampNTZType's class name.
+   */
+  final val TIMESTAMP_NTZ = "TimestampNTZType$"
+
   def toTTypeId(typ: DataType): TTypeId = typ match {
     case NullType => TTypeId.NULL_TYPE
     case BooleanType => TTypeId.BOOLEAN_TYPE
@@ -39,6 +44,7 @@ object SchemaHelper {
     case _: DecimalType => TTypeId.DECIMAL_TYPE
     case DateType => TTypeId.DATE_TYPE
     case TimestampType => TTypeId.TIMESTAMP_TYPE
+    case ntz if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) => TTypeId.TIMESTAMP_TYPE
     case BinaryType => TTypeId.BINARY_TYPE
     case CalendarIntervalType => TTypeId.STRING_TYPE
     case dt if dt.getClass.getSimpleName.equals("DayTimeIntervalType") =>
@@ -104,6 +110,7 @@ object SchemaHelper {
     case _: DecimalType => java.sql.Types.DECIMAL
     case DateType => java.sql.Types.DATE
     case TimestampType => java.sql.Types.TIMESTAMP
+    case ntz if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) => java.sql.Types.TIMESTAMP
     case BinaryType => java.sql.Types.BINARY
     case _: ArrayType => java.sql.Types.ARRAY
     case _: MapType => java.sql.Types.JAVA_OBJECT
@@ -118,6 +125,7 @@ object SchemaHelper {
    * For array, map, string, and binaries, the column size is variable, return null as unknown.
    */
   def getColumnSize(sparkType: DataType): Option[Int] = sparkType match {
+    case ntz if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) => Some(ntz.defaultSize)
     case dt @ (BooleanType | _: NumericType | DateType | TimestampType |
         CalendarIntervalType | NullType) =>
       Some(dt.defaultSize)
@@ -145,6 +153,7 @@ object SchemaHelper {
     case DoubleType => Some(15)
     case d: DecimalType => Some(d.scale)
     case TimestampType => Some(6)
+    case ntz if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) => Some(6)
     case _ => None
   }
 
diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.types._
 
 import org.apache.kyuubi.engine.spark.WithSparkSQLEngine
+import org.apache.kyuubi.engine.spark.schema.SchemaHelper.TIMESTAMP_NTZ
 import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim
 import org.apache.kyuubi.operation.{HiveMetadataTests, SparkQueryTests}
 import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
@@ -76,6 +77,12 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with
       .add("c16", "binary", nullable = false, "16")
       .add("c17", "struct<X: string>", nullable = true, "17")
 
+    // since spark3.3.0
+    if (SPARK_ENGINE_MAJOR_MINOR_VERSION._1 > 3 ||
+      (SPARK_ENGINE_MAJOR_MINOR_VERSION._1 == 3 && SPARK_ENGINE_MAJOR_MINOR_VERSION._2 >= 3)) {
+      schema.add("c18", "timestamp_ntz", nullable = true, "18")
+    }
+
     val ddl =
       s"""
          |CREATE TABLE IF NOT EXISTS $defaultSchema.$tableName (
@@ -110,7 +117,8 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with
           TIMESTAMP,
           STRUCT,
           BINARY,
-          STRUCT)
+          STRUCT,
+          TIMESTAMP)
 
         var pos = 0
 
@@ -137,6 +145,8 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with
             case FloatType => assert(decimalDigits === 7)
             case DoubleType => assert(decimalDigits === 15)
             case TimestampType => assert(decimalDigits === 6)
+            case ntz if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) =>
+              assert(decimalDigits === 6)
             case _ => assert(decimalDigits === 0) // nulls
           }
 
@@ -154,7 +164,7 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with
           pos += 1
         }
 
-        assert(pos === 18, "all columns should have been verified")
+        assert(pos === schema.length, "all columns should have been verified")
       }
 
       val rowSet = metaData.getColumns(null, "*", "not_exist", "not_exist")
diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala
@@ -174,6 +174,21 @@ trait SparkQueryTests extends HiveJDBCTestHelper {
     }
   }
 
+  test("execute statement - select timestamp_ntz") {
+    assume(SPARK_ENGINE_MAJOR_MINOR_VERSION._1 >= 3
+      && SPARK_ENGINE_MAJOR_MINOR_VERSION._2 > 2)
+    withJdbcStatement() { statement =>
+      val resultSet = statement.executeQuery(
+        "SELECT make_timestamp_ntz(2022, 03, 24, 18, 08, 31.800) AS col")
+      assert(resultSet.next())
+      assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2022-03-24 18:08:31.800"))
+      val metaData = resultSet.getMetaData
+      assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP)
+      assert(metaData.getPrecision(1) === 29)
+      assert(metaData.getScale(1) === 9)
+    }
+  }
+
   test("execute statement - select daytime interval") {
     withJdbcStatement() { statement =>
       Map(