From ff92ba3ae3abaae0d8ce8bb8c08465f43c14906f Mon Sep 17 00:00:00 2001
From: xuanyuanking <xyliyuanjian@gmail.com>
Date: Mon, 17 Jul 2017 15:38:56 +0800
Subject: [PATCH 1/2] empty files should be skipped while write to file

---
 .../datasources/FileFormatWriter.scala        | 18 +++++++-
 .../datasources/FileFormatWriterSuite.scala   | 45 +++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 9eb9eae699e94..3a8505fb68384 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -236,7 +236,9 @@ object FileFormatWriter extends Logging {
     committer.setupTask(taskAttemptContext)
 
     val writeTask =
-      if (description.partitionColumns.isEmpty && description.bucketIdExpression.isEmpty) {
+      if (!iterator.hasNext) {
+        new EmptyDirectoryWriteTask
+      } else if (description.partitionColumns.isEmpty && description.bucketIdExpression.isEmpty) {
         new SingleDirectoryWriteTask(description, taskAttemptContext, committer)
       } else {
         new DynamicPartitionWriteTask(description, taskAttemptContext, committer)
@@ -301,6 +303,20 @@ object FileFormatWriter extends Logging {
     }
   }
 
+  /** ExecuteWriteTask for empty partitions */
+  private class EmptyDirectoryWriteTask extends ExecuteWriteTask {
+
+    override def execute(iter: Iterator[InternalRow]): ExecutedWriteSummary = {
+      ExecutedWriteSummary(
+        updatedPartitions = Set.empty,
+        numOutputFile = 0,
+        numOutputBytes = 0,
+        numOutputRows = 0)
+    }
+
+    override def releaseResources(): Unit = {}
+  }
+
   /** Writes data to a single directory (used for non-dynamic-partition writes). */
   private class SingleDirectoryWriteTask(
       description: WriteJobDescription,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
new file mode 100644
index 0000000000000..acf47bcd94ccb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.{File, FilenameFilter}
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SharedSQLContext
+
+class FileFormatWriterSuite extends QueryTest with SharedSQLContext {
+
+  test("empty file should be skipped while write to file") {
+    withTempDir { dir =>
+      dir.delete()
+      spark.range(1000).repartition(2).write.parquet(dir.toString)
+      val df = spark.read.parquet(dir.toString)
+
+      withTempDir { dst_dir =>
+        dst_dir.delete()
+        df.where("id = 50").write.parquet(dst_dir.toString)
+        val allFiles = dst_dir.listFiles(new FilenameFilter {
+          override def accept(dir: File, name: String): Boolean = {
+            !name.startsWith(".") && !name.startsWith("_")
+          }
+        })
+        assert(allFiles.length == 1)
+      }
+    }
+  }
+}

From e08fb1939d5267a38f3318af7506b6ed8628ebbf Mon Sep 17 00:00:00 2001
From: xuanyuanking <xyliyuanjian@gmail.com>
Date: Mon, 17 Jul 2017 18:20:34 +0800
Subject: [PATCH 2/2] Handle the empty result of parquet

---
 .../spark/sql/execution/datasources/FileFormatWriter.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 3a8505fb68384..2654704c1da05 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -236,7 +236,7 @@ object FileFormatWriter extends Logging {
     committer.setupTask(taskAttemptContext)
 
     val writeTask =
-      if (!iterator.hasNext) {
+      if (sparkPartitionId != 0 && !iterator.hasNext) {
         new EmptyDirectoryWriteTask
       } else if (description.partitionColumns.isEmpty && description.bucketIdExpression.isEmpty) {
         new SingleDirectoryWriteTask(description, taskAttemptContext, committer)