apache · jeanlyn · Jan 4, 2015 · Jan 6, 2015 · Jan 6, 2015 · srowen
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.mapred.Reporter
 import org.apache.hadoop.mapred.JobID
 import org.apache.hadoop.mapred.TaskAttemptID
 import org.apache.hadoop.mapred.TaskID
+import org.apache.hadoop.mapred.InvalidInputException
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark._
@@ -198,12 +199,20 @@ class HadoopRDD[K, V](
     if (inputFormat.isInstanceOf[Configurable]) {
       inputFormat.asInstanceOf[Configurable].setConf(jobConf)
     }
-    val inputSplits = inputFormat.getSplits(jobConf, minPartitions)
-    val array = new Array[Partition](inputSplits.size)
-    for (i <- 0 until inputSplits.size) {
-      array(i) = new HadoopPartition(id, i, inputSplits(i))
+    // SPARK-5068:catch the exception when the path is not exists
+    try {
+      val inputSplits = inputFormat.getSplits(jobConf, minPartitions)
+      val array = new Array[Partition](inputSplits.size)
+      for (i <- 0 until inputSplits.size) {
+        array(i) = new HadoopPartition(id, i, inputSplits(i))
+      }
+      array
+    } catch {
+      case e: InvalidInputException => {
+        logWarning("InvalidInput!", e)
+        new Array[Partition](0)
+      }
     }
-    array
   }
 
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.File
+
+import com.google.common.io.Files
+import org.apache.spark.sql.{QueryTest, _}
+import org.apache.spark.sql.hive.test.TestHive
+/* Implicits */
+import org.apache.spark.sql.hive.test.TestHive._
+
+
+class QueryPartitionSuite extends QueryTest {
+
+  test("SPARK-5068: query data when path doesn't exists"){
+    val testData = TestHive.sparkContext.parallelize(
+      (1 to 10).map(i => TestData(i, i.toString)))
+    testData.registerTempTable("testData")
+
+    val tmpDir = Files.createTempDir()
+    //create the table for test
+    sql(s"CREATE TABLE table_with_partition(key int,value string) PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') SELECT key,value FROM testData")
+    //test for the exist path
+    checkAnswer(sql("select key,value from table_with_partition"),
+      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+
+    //delect the path of one partition
+    val folders = tmpDir.listFiles.filter(_.isDirectory).toList
+    def deleteAll(file:File){
+      if (file.isDirectory()) {
+        for (f:File <-file.listFiles()) {
+          deleteAll(f);
+        }
+      }
+      file.delete();
+    }
+    deleteAll(folders(0))
+
+    //test for the affter delete the path
+    checkAnswer(sql("select key,value from table_with_partition"),
+      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+
+    sql("DROP TABLE table_with_partition")
+    sql("DROP TABLE createAndInsertTest")
+  }
+}