apache · brkyvz · Aug 3, 2019 · Aug 4, 2019 · Aug 5, 2019 · Aug 5, 2019
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
@@ -89,5 +89,14 @@ public enum TableCapability {
   /**
    * Signals that the table accepts input of any schema in a write operation.
    */
-  ACCEPT_ANY_SCHEMA
+  ACCEPT_ANY_SCHEMA,
+
+  /**
+   * Signals that the table supports append writes using the V1 InsertableRelation interface.
+   * <p>
+   * Tables that return this capability must create a V1WriteBuilder and may also support additional
+   * write modes, like {@link #TRUNCATE}, and {@link #OVERWRITE_BY_FILTER}, but cannot support
+   * {@link #OVERWRITE_DYNAMIC}.
+   */
+  V1_BATCH_WRITE
 }
diff --git a/...e/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/...e/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.util.UUID
+
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
@@ -29,8 +31,10 @@ import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources
+import org.apache.spark.sql.sources.v2.TableCapability
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.sources.v2.writer.V1WriteBuilder
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Strategy extends Strategy with PredicateHelper {
@@ -169,10 +173,10 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       catalog match {
         case staging: StagingTableCatalog =>
           AtomicCreateTableAsSelectExec(
-            staging, ident, parts, planLater(query), props, writeOptions, ifNotExists) :: Nil
+            staging, ident, parts, query, planLater(query), props, writeOptions, ifNotExists) :: Nil
         case _ =>
           CreateTableAsSelectExec(
-            catalog, ident, parts, planLater(query), props, writeOptions, ifNotExists) :: Nil
+            catalog, ident, parts, query, planLater(query), props, writeOptions, ifNotExists) :: Nil
       }
 
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
@@ -191,6 +195,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
             staging,
             ident,
             parts,
+            query,
             planLater(query),
             props,
             writeOptions,
@@ -200,24 +205,33 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
             catalog,
             ident,
             parts,
+            query,
             planLater(query),
             props,
             writeOptions,
             orCreate = orCreate) :: Nil
       }
 
     case AppendData(r: DataSourceV2Relation, query, _) =>
-      AppendDataExec(r.table.asWritable, r.options, planLater(query)) :: Nil
+      r.table.asWritable match {
+        case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+          AppendDataExecV1(v1, r.options, query) :: Nil
+        case v2 =>
+          AppendDataExec(v2, r.options, planLater(query)) :: Nil
+      }
 
     case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
       val filters = splitConjunctivePredicates(deleteExpr).map {
         filter => DataSourceStrategy.translateFilter(deleteExpr).getOrElse(
           throw new AnalysisException(s"Cannot translate expression to source filter: $filter"))
       }.toArray
-
-      OverwriteByExpressionExec(
-        r.table.asWritable, filters, r.options, planLater(query)) :: Nil
+      r.table.asWritable match {
+        case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+          OverwriteByExpressionExecV1(v1, filters, r.options, query) :: Nil
+        case v2 =>
+          OverwriteByExpressionExec(v2, filters, r.options, planLater(query)) :: Nil
+      }
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
       OverwritePartitionsDynamicExec(r.table.asWritable, r.options, planLater(query)) :: Nil

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkException
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Dataset, SaveMode}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.sources.{AlwaysTrue, CreatableRelationProvider, Filter, InsertableRelation}
+import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
+import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * Physical plan node for append into a v2 table using V1 write interfaces.
+ *
+ * Rows in the output data set are appended.
+ */
+case class AppendDataExecV1(
+    table: SupportsWrite,
+    writeOptions: CaseInsensitiveStringMap,
+    plan: LogicalPlan) extends V1FallbackWriters {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    writeWithV1(newWriteBuilder().buildForV1Write())
+  }
+}
+
+/**
+ * Physical plan node for overwrite into a v2 table with V1 write interfaces. Note that when this
+ * interface is used, the atomicity of the operation depends solely on the target data source.
+ *
+ * Overwrites data in a table matched by a set of filters. Rows matching all of the filters will be
+ * deleted and rows in the output data set are appended.
+ *
+ * This plan is used to implement SaveMode.Overwrite. The behavior of SaveMode.Overwrite is to
+ * truncate the table -- delete all rows -- and append the output data set. This uses the filter
+ * AlwaysTrue to delete all rows.
+ */
+case class OverwriteByExpressionExecV1(
+    table: SupportsWrite,
+    deleteWhere: Array[Filter],
+    writeOptions: CaseInsensitiveStringMap,
+    plan: LogicalPlan) extends V1FallbackWriters {
+
+  private def isTruncate(filters: Array[Filter]): Boolean = {
+    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    newWriteBuilder() match {
+      case builder: SupportsTruncate if isTruncate(deleteWhere) =>
+        writeWithV1(builder.truncate().asV1Builder.buildForV1Write())
+
+      case builder: SupportsOverwrite =>
+        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write())
+
+      case _ =>
+        throw new SparkException(s"Table does not support overwrite by expression: $table")
+    }
+  }
+}
+
+/** Some helper interfaces that use V2 write semantics through the V1 writer interface. */
+sealed trait V1FallbackWriters extends SupportsV1Write {
+  override def output: Seq[Attribute] = Nil
+  override final def children: Seq[SparkPlan] = Nil
+
+  def table: SupportsWrite
+  def writeOptions: CaseInsensitiveStringMap
+
+  protected implicit class toV1WriteBuilder(builder: WriteBuilder) {
+    def asV1Builder: V1WriteBuilder = builder match {
+      case v1: V1WriteBuilder => v1
+      case other => throw new IllegalStateException(
+        s"The returned writer ${other} was no longer a V1WriteBuilder.")
+    }
+  }
+
+  protected def newWriteBuilder(): V1WriteBuilder = {
+    val writeBuilder = table.newWriteBuilder(writeOptions)
+      .withInputDataSchema(plan.schema)
+      .withQueryId(UUID.randomUUID().toString)
+    writeBuilder.asV1Builder
+  }
+}
+
+/**
+ * A trait that allows Tables that use V1 Writer interfaces to append data.
+ */
+trait SupportsV1Write extends SparkPlan {
+  // TODO: We should be able to work on SparkPlans at this point.
+  def plan: LogicalPlan
+
+  protected def writeWithV1(relation: InsertableRelation): RDD[InternalRow] = {
+    relation.insert(Dataset.ofRows(sqlContext.sparkSession, plan), overwrite = false)
+    sparkContext.emptyRDD
+  }
+}