fix ut

apache · Feb 15, 2023 · b8e4b28 · b8e4b28
1 parent 87c6f9e
commit b8e4b28
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 21 deletions.
diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md
@@ -440,7 +440,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co
 | kyuubi.operation.plan.only.output.style         | plain                                                                           | Configures the planOnly output style. The value can be 'plain' or 'json', and the default value is 'plain'. This configuration supports only the output styles of the Spark engine                                                                                                                                                                                                                                                                                                                       | string   | 1.7.0 |
 | kyuubi.operation.progress.enabled               | false                                                                           | Whether to enable the operation progress. When true, the operation progress will be returned in `GetOperationStatus`.                                                                                                                                                                                                                                                                                                                                                                                    | boolean  | 1.6.0 |
 | kyuubi.operation.query.timeout                  | &lt;undefined&gt;                                                               | Timeout for query executions at server-side, take effect with client-side timeout(`java.sql.Statement.setQueryTimeout`) together, a running query will be cancelled automatically if timeout. It's off by default, which means only client-side take full control of whether the query should timeout or not. If set, client-side timeout is capped at this point. To cancel the queries right away without waiting for task to finish, consider enabling kyuubi.operation.interrupt.on.cancel together. | duration | 1.2.0 |
-| kyuubi.operation.result.arrow.timestampAsString | true                                                                            | When true, arrow-based rowsets will convert columns of type timestamp to strings for transmission.                                                                                                                                                                                                                                                                                                                                                                                                       | boolean  | 1.7.0 |
+| kyuubi.operation.result.arrow.timestampAsString | true                                                                            | When true, arrow-based rowsets will convert columns of type timestamp to strings for transmission. Note that the timestamp column has a different behavior compared to thrift when it's false, but it brings better transfer performance.                                                                                                                                                                                                                                                                | boolean  | 1.7.0 |
 | kyuubi.operation.result.format                  | thrift                                                                          | Specify the result format, available configs are: <ul> <li>THRIFT: the result will convert to TRow at the engine driver side. </li> <li>ARROW: the result will be encoded as Arrow at the executor side before collecting by the driver, and deserialized at the client side. note that it only takes effect for kyuubi-hive-jdbc clients now.</li></ul>                                                                                                                                                 | string   | 1.7.0 |
 | kyuubi.operation.result.max.rows                | 0                                                                               | Max rows of Spark query results. Rows exceeding the limit would be ignored. By setting this value to 0 to disable the max rows limit.                                                                                                                                                                                                                                                                                                                                                                    | int      | 1.6.0 |
 | kyuubi.operation.scheduler.pool                 | &lt;undefined&gt;                                                               | The scheduler pool of job. Note that, this config should be used after changing Spark config spark.scheduler.mode=FAIR.                                                                                                                                                                                                                                                                                                                                                                                  | string   | 1.1.1 |

diff --git a/...k-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/...k-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala
@@ -24,7 +24,7 @@ import org.apache.hive.service.rpc.thrift.{TGetResultSetMetadataResp, TProgressU
 import org.apache.spark.kyuubi.{SparkProgressMonitor, SQLOperationListener}
 import org.apache.spark.kyuubi.SparkUtilsHelper.redact
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.types.StructType
 
 import org.apache.kyuubi.{KyuubiSQLException, Utils}
@@ -136,7 +136,7 @@ abstract class SparkOperation(session: Session)
     spark.sparkContext.setLocalProperty
 
   protected def withLocalProperties[T](f: => T): T = {
-    SQLConf.withExistingConf(spark.sessionState.conf) {
+    SQLExecution.withSQLConfPropagated(spark) {
       val originalSession = SparkSession.getActiveSession
       try {
         SparkSession.setActiveSession(spark)

diff --git a/...kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala b/...kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
@@ -25,7 +25,6 @@ import scala.collection.JavaConverters._
 import org.apache.hive.service.rpc.thrift._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.HiveResult
-import org.apache.spark.sql.execution.HiveResult.TimeFormatters
 import org.apache.spark.sql.types._
 
 import org.apache.kyuubi.util.RowSetUtils._
@@ -34,11 +33,9 @@ object RowSet {
 
   def toHiveString(
       valueAndType: (Any, DataType),
-      nested: Boolean = false,
-      timeFormatters: TimeFormatters = HiveResult.getTimeFormatters): String = {
+      nested: Boolean = false): String = {
     // compatible w/ Spark 3.1 and above
-//    val timeFormatters = HiveResult.getTimeFormatters
-    HiveResult.toHiveString(valueAndType, nested, timeFormatters)
+    HiveResult.toHiveString(valueAndType, nested, HiveResult.getTimeFormatters)
   }
 
   def toTRowSet(

diff --git a/...uubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/...uubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.kyuubi
 
-import java.time.ZoneId
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
-import org.apache.spark.sql.execution.HiveResult.TimeFormatters
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
@@ -36,7 +32,6 @@ object SparkDatasetHelper {
   def convertTopLevelComplexTypeToHiveString(
       df: DataFrame,
       timestampAsString: Boolean): DataFrame = {
-    val timeZone = ZoneId.of(df.sparkSession.sessionState.conf.sessionLocalTimeZone)
 
     val quotedCol = (name: String) => col(quoteIfNeeded(name))
 
@@ -51,7 +46,7 @@ object SparkDatasetHelper {
         case StructType(Array(StructField(_, mt: MapType, _, _))) =>
           RowSet.toHiveString((row.toSeq.head, mt), nested = true)
         case StructType(Array(StructField(_, tt: TimestampType, _, _))) =>
-          RowSet.toHiveString((row.toSeq.head, tt), nested = true, getTimeFormatters(timeZone))
+          RowSet.toHiveString((row.toSeq.head, tt), nested = true)
         case _ =>
           throw new UnsupportedOperationException
       }
@@ -80,10 +75,4 @@ object SparkDatasetHelper {
       s"`${part.replace("`", "``")}`"
     }
   }
-
-  private def getTimeFormatters(timeZone: ZoneId): TimeFormatters = {
-    val dateFormatter = DateFormatter()
-    val timestampFormatter = TimestampFormatter.getFractionFormatter(timeZone)
-    TimeFormatters(dateFormatter, timestampFormatter)
-  }
 }
diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -1679,7 +1679,8 @@ object KyuubiConf {
   val ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING: ConfigEntry[Boolean] =
     buildConf("kyuubi.operation.result.arrow.timestampAsString")
       .doc("When true, arrow-based rowsets will convert columns of type timestamp to strings for" +
-        " transmission.")
+        " transmission. Note that the timestamp column has a different behavior compared to" +
+        " thrift when it's false, but it brings better transfer performance.")
       .version("1.7.0")
       .booleanConf
       .createWithDefault(true)