[SPARK-27521][SQL] Move data source v2 to catalyst module

## What changes were proposed in this pull request? Currently we are in a strange status that, some data source v2 interfaces(catalog related) are in sql/catalyst, some data source v2 interfaces(Table, ScanBuilder, DataReader, etc.) are in sql/core. I don't see a reason to keep data source v2 API in 2 modules. If we should pick one module, I think sql/catalyst is the one to go. Catalyst module already has some user-facing stuff like DataType, Row, etc. And we have to update `Analyzer` and `SessionCatalog` to support the new catalog plugin, which needs to be in the catalyst module. This PR can solve the problem we have in #24246 ## How was this patch tested? existing tests Closes #24416 from cloud-fan/move. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
apache · Jun 5, 2019 · 8b6232b · 8b6232b
1 parent 3f102a8
commit 8b6232b
Show file tree

Hide file tree

Showing 60 changed files with 65 additions and 28 deletions.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -291,6 +291,45 @@ object MimaExcludes {
       case _ => true
     },
 
+    // [SPARK-27521][SQL] Move data source v2 to catalyst module
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarBatch"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ArrowColumnVector"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarRow"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarArray"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarMap"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnVector"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThanOrEqual"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringEndsWith"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThanOrEqual$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.In$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Not"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNotNull"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThan"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThanOrEqual"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualNullSafe$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThan$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.In"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.And"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringStartsWith$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualNullSafe"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringEndsWith$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThanOrEqual$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Not$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNull$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThan$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNotNull$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Or"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualTo$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThan"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringContains"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Filter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNull"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualTo"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.And$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Or$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringStartsWith"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringContains$"),
+
     // [SPARK-26216][SQL] Do not use case class as public API (UserDefinedFunction)
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction$"),
     ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction"),

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
@@ -114,6 +114,10 @@
       <version>2.7.3</version>
       <type>jar</type>
     </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/.../sql/sources/v2/SessionConfigSupport.java → .../sql/sources/v2/SessionConfigSupport.java b/.../sql/sources/v2/SessionConfigSupport.java → .../sql/sources/v2/SessionConfigSupport.java
diff --git a/...he/spark/sql/sources/v2/SupportsRead.java → ...he/spark/sql/sources/v2/SupportsRead.java b/...he/spark/sql/sources/v2/SupportsRead.java → ...he/spark/sql/sources/v2/SupportsRead.java
diff --git a/...e/spark/sql/sources/v2/SupportsWrite.java → ...e/spark/sql/sources/v2/SupportsWrite.java b/...e/spark/sql/sources/v2/SupportsWrite.java → ...e/spark/sql/sources/v2/SupportsWrite.java
diff --git a/...e/spark/sql/sources/v2/TableProvider.java → ...e/spark/sql/sources/v2/TableProvider.java b/...e/spark/sql/sources/v2/TableProvider.java → ...e/spark/sql/sources/v2/TableProvider.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources.v2;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.DataSourceRegister;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
@@ -56,13 +55,7 @@ public interface TableProvider {
    * @throws UnsupportedOperationException
    */
   default Table getTable(CaseInsensitiveStringMap options, StructType schema) {
-    String name;
-    if (this instanceof DataSourceRegister) {
-      name = ((DataSourceRegister) this).shortName();
-    } else {
-      name = this.getClass().getName();
-    }
     throw new UnsupportedOperationException(
-      name + " source does not support user-specified schema");
+      this.getClass().getSimpleName() + " source does not support user-specified schema");
   }
 }
diff --git a/...he/spark/sql/sources/v2/reader/Batch.java → ...he/spark/sql/sources/v2/reader/Batch.java b/...he/spark/sql/sources/v2/reader/Batch.java → ...he/spark/sql/sources/v2/reader/Batch.java
diff --git a/...sql/sources/v2/reader/InputPartition.java → ...sql/sources/v2/reader/InputPartition.java b/...sql/sources/v2/reader/InputPartition.java → ...sql/sources/v2/reader/InputPartition.java
diff --git a/...ql/sources/v2/reader/PartitionReader.java → ...ql/sources/v2/reader/PartitionReader.java b/...ql/sources/v2/reader/PartitionReader.java → ...ql/sources/v2/reader/PartitionReader.java
diff --git a/...ces/v2/reader/PartitionReaderFactory.java → ...ces/v2/reader/PartitionReaderFactory.java b/...ces/v2/reader/PartitionReaderFactory.java → ...ces/v2/reader/PartitionReaderFactory.java
diff --git a/...che/spark/sql/sources/v2/reader/Scan.java → ...che/spark/sql/sources/v2/reader/Scan.java b/...che/spark/sql/sources/v2/reader/Scan.java → ...che/spark/sql/sources/v2/reader/Scan.java
diff --git a/...rk/sql/sources/v2/reader/ScanBuilder.java → ...rk/sql/sources/v2/reader/ScanBuilder.java b/...rk/sql/sources/v2/reader/ScanBuilder.java → ...rk/sql/sources/v2/reader/ScanBuilder.java
diff --git a/...ark/sql/sources/v2/reader/Statistics.java → ...ark/sql/sources/v2/reader/Statistics.java b/...ark/sql/sources/v2/reader/Statistics.java → ...ark/sql/sources/v2/reader/Statistics.java
diff --git a/...es/v2/reader/SupportsPushDownFilters.java → ...es/v2/reader/SupportsPushDownFilters.java b/...es/v2/reader/SupportsPushDownFilters.java → ...es/v2/reader/SupportsPushDownFilters.java
diff --git a/...ader/SupportsPushDownRequiredColumns.java → ...ader/SupportsPushDownRequiredColumns.java b/...ader/SupportsPushDownRequiredColumns.java → ...ader/SupportsPushDownRequiredColumns.java
diff --git a/...v2/reader/SupportsReportPartitioning.java → ...v2/reader/SupportsReportPartitioning.java b/...v2/reader/SupportsReportPartitioning.java → ...v2/reader/SupportsReportPartitioning.java
diff --git a/...s/v2/reader/SupportsReportStatistics.java → ...s/v2/reader/SupportsReportStatistics.java b/...s/v2/reader/SupportsReportStatistics.java → ...s/v2/reader/SupportsReportStatistics.java
diff --git a/...r/partitioning/ClusteredDistribution.java → ...r/partitioning/ClusteredDistribution.java b/...r/partitioning/ClusteredDistribution.java → ...r/partitioning/ClusteredDistribution.java
diff --git a/.../v2/reader/partitioning/Distribution.java → .../v2/reader/partitioning/Distribution.java b/.../v2/reader/partitioning/Distribution.java → .../v2/reader/partitioning/Distribution.java
diff --git a/.../v2/reader/partitioning/Partitioning.java → .../v2/reader/partitioning/Partitioning.java b/.../v2/reader/partitioning/Partitioning.java → .../v2/reader/partitioning/Partitioning.java
diff --git a/.../streaming/ContinuousPartitionReader.java → .../streaming/ContinuousPartitionReader.java b/.../streaming/ContinuousPartitionReader.java → .../streaming/ContinuousPartitionReader.java
diff --git a/...ing/ContinuousPartitionReaderFactory.java → ...ing/ContinuousPartitionReaderFactory.java b/...ing/ContinuousPartitionReaderFactory.java → ...ing/ContinuousPartitionReaderFactory.java
diff --git a/...v2/reader/streaming/ContinuousStream.java → ...v2/reader/streaming/ContinuousStream.java b/...v2/reader/streaming/ContinuousStream.java → ...v2/reader/streaming/ContinuousStream.java
diff --git a/...v2/reader/streaming/MicroBatchStream.java → ...v2/reader/streaming/MicroBatchStream.java b/...v2/reader/streaming/MicroBatchStream.java → ...v2/reader/streaming/MicroBatchStream.java
diff --git a/...l/sources/v2/reader/streaming/Offset.java → ...l/sources/v2/reader/streaming/Offset.java b/...l/sources/v2/reader/streaming/Offset.java → ...l/sources/v2/reader/streaming/Offset.java
diff --git a/.../v2/reader/streaming/PartitionOffset.java → .../v2/reader/streaming/PartitionOffset.java b/.../v2/reader/streaming/PartitionOffset.java → .../v2/reader/streaming/PartitionOffset.java
diff --git a/.../v2/reader/streaming/SparkDataStream.java → .../v2/reader/streaming/SparkDataStream.java b/.../v2/reader/streaming/SparkDataStream.java → .../v2/reader/streaming/SparkDataStream.java
diff --git a/...ark/sql/sources/v2/writer/BatchWrite.java → ...ark/sql/sources/v2/writer/BatchWrite.java b/...ark/sql/sources/v2/writer/BatchWrite.java → ...ark/sql/sources/v2/writer/BatchWrite.java
diff --git a/...ark/sql/sources/v2/writer/DataWriter.java → ...ark/sql/sources/v2/writer/DataWriter.java b/...ark/sql/sources/v2/writer/DataWriter.java → ...ark/sql/sources/v2/writer/DataWriter.java
diff --git a/.../sources/v2/writer/DataWriterFactory.java → .../sources/v2/writer/DataWriterFactory.java b/.../sources/v2/writer/DataWriterFactory.java → .../sources/v2/writer/DataWriterFactory.java
diff --git a/...s/v2/writer/SupportsDynamicOverwrite.java → ...s/v2/writer/SupportsDynamicOverwrite.java b/...s/v2/writer/SupportsDynamicOverwrite.java → ...s/v2/writer/SupportsDynamicOverwrite.java
diff --git a/.../sources/v2/writer/SupportsOverwrite.java → .../sources/v2/writer/SupportsOverwrite.java b/.../sources/v2/writer/SupportsOverwrite.java → .../sources/v2/writer/SupportsOverwrite.java
diff --git a/...l/sources/v2/writer/SupportsTruncate.java → ...l/sources/v2/writer/SupportsTruncate.java b/...l/sources/v2/writer/SupportsTruncate.java → ...l/sources/v2/writer/SupportsTruncate.java
diff --git a/...k/sql/sources/v2/writer/WriteBuilder.java → ...k/sql/sources/v2/writer/WriteBuilder.java b/...k/sql/sources/v2/writer/WriteBuilder.java → ...k/sql/sources/v2/writer/WriteBuilder.java
diff --git a/...ources/v2/writer/WriterCommitMessage.java → ...ources/v2/writer/WriterCommitMessage.java b/...ources/v2/writer/WriterCommitMessage.java → ...ources/v2/writer/WriterCommitMessage.java
diff --git a/...streaming/StreamingDataWriterFactory.java → ...streaming/StreamingDataWriterFactory.java b/...streaming/StreamingDataWriterFactory.java → ...streaming/StreamingDataWriterFactory.java
diff --git a/...s/v2/writer/streaming/StreamingWrite.java → ...s/v2/writer/streaming/StreamingWrite.java b/...s/v2/writer/streaming/StreamingWrite.java → ...s/v2/writer/streaming/StreamingWrite.java
diff --git a/...ark/sql/vectorized/ArrowColumnVector.java → ...ark/sql/vectorized/ArrowColumnVector.java b/...ark/sql/vectorized/ArrowColumnVector.java → ...ark/sql/vectorized/ArrowColumnVector.java
@@ -23,7 +23,7 @@
 import org.apache.arrow.vector.holders.NullableVarCharHolder;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.arrow.ArrowUtils;
+import org.apache.spark.sql.util.ArrowUtils;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.UTF8String;
 

diff --git a/...he/spark/sql/vectorized/ColumnVector.java → ...he/spark/sql/vectorized/ColumnVector.java b/...he/spark/sql/vectorized/ColumnVector.java → ...he/spark/sql/vectorized/ColumnVector.java
diff --git a/...e/spark/sql/vectorized/ColumnarArray.java → ...e/spark/sql/vectorized/ColumnarArray.java b/...e/spark/sql/vectorized/ColumnarArray.java → ...e/spark/sql/vectorized/ColumnarArray.java
diff --git a/...e/spark/sql/vectorized/ColumnarBatch.java → ...e/spark/sql/vectorized/ColumnarBatch.java b/...e/spark/sql/vectorized/ColumnarBatch.java → ...e/spark/sql/vectorized/ColumnarBatch.java
diff --git a/...che/spark/sql/vectorized/ColumnarMap.java → ...che/spark/sql/vectorized/ColumnarMap.java b/...che/spark/sql/vectorized/ColumnarMap.java → ...che/spark/sql/vectorized/ColumnarMap.java
diff --git a/...che/spark/sql/vectorized/ColumnarRow.java → ...che/spark/sql/vectorized/ColumnarRow.java b/...che/spark/sql/vectorized/ColumnarRow.java → ...che/spark/sql/vectorized/ColumnarRow.java
diff --git a/...rg/apache/spark/sql/sources/filters.scala → ...rg/apache/spark/sql/sources/filters.scala b/...rg/apache/spark/sql/sources/filters.scala → ...rg/apache/spark/sql/sources/filters.scala
diff --git a/...park/sql/execution/arrow/ArrowUtils.scala → ...rg/apache/spark/sql/util/ArrowUtils.scala b/...park/sql/execution/arrow/ArrowUtils.scala → ...rg/apache/spark/sql/util/ArrowUtils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.arrow
+package org.apache.spark.sql.util
 
 import scala.collection.JavaConverters._
 

diff --git a/...sql/execution/arrow/ArrowUtilsSuite.scala → ...ache/spark/sql/util/ArrowUtilsSuite.scala b/...sql/execution/arrow/ArrowUtilsSuite.scala → ...ache/spark/sql/util/ArrowUtilsSuite.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.arrow
+package org.apache.spark.sql.util
 
-import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
+import org.apache.arrow.vector.types.pojo.ArrowType
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeUtils

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
@@ -112,10 +112,6 @@
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.arrow</groupId>
-      <artifactId>arrow-vector</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.xbean</groupId>
       <artifactId>xbean-asm7-shaded</artifactId>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -34,6 +34,7 @@ import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.util.{ByteBufferOutputStream, Utils}
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -25,6 +25,7 @@ import org.apache.arrow.vector.complex._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ArrowUtils
 
 object ArrowWriter {
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
@@ -28,8 +28,8 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.util.Utils
 
 /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
@@ -24,8 +24,8 @@ import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
 
 /**
  * Grouped a iterator into batches.

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -29,8 +29,9 @@ import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
 import org.apache.spark._
 import org.apache.spark.api.python._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter}
+import org.apache.spark.sql.execution.arrow.ArrowWriter
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.util.Utils
 

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/...core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
@@ -27,9 +27,9 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
 
 /**
  * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]]

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.execution.window._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.util.Utils
 
 /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -31,8 +31,9 @@ import org.apache.spark.api.r._
 import org.apache.spark.api.r.SpecialLengths
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter}
+import org.apache.spark.sql.execution.arrow.ArrowWriter
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.util.Utils
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{BinaryType, Decimal, IntegerType, StructField, StructType}
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.util.Utils
 
 

diff --git a/...test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/...test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -305,7 +305,7 @@ class RateStreamProviderSuite extends StreamTest {
         .load()
     }
     assert(exception.getMessage.contains(
-      "rate source does not support user-specified schema"))
+      "RateStreamProvider source does not support user-specified schema"))
   }
 
   test("continuous data") {

diff --git a/...c/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/...c/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -204,7 +204,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       provider.getTable(new CaseInsensitiveStringMap(params.asJava), userSpecifiedSchema)
     }
     assert(exception.getMessage.contains(
-      "socket source does not support user-specified schema"))
+      "TextSocketSourceProvider source does not support user-specified schema"))
   }
 
   test("input row metrics") {

diff --git a/...ore/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/...ore/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
@@ -21,8 +21,8 @@ import org.apache.arrow.vector._
 import org.apache.arrow.vector.complex._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.ArrowColumnVector
 import org.apache.spark.unsafe.types.UTF8String
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -31,9 +31,9 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types.CalendarInterval