From caeeb0d3cf9f0f898a8fa730723005e8c4ef77b5 Mon Sep 17 00:00:00 2001
From: s71955 <sujithchacko.2010@gmail.com>
Date: Tue, 11 Sep 2018 19:41:55 +0530
Subject: [PATCH] [SPARK-23425][SQL][FOLLOWUP] Support wildcards in HDFS path
 for load table command.

What changes were proposed in this pull request
Updated the Migration guide for the behavior changes done in the JIRA issue SPARK-23425. Added testcase for verifying the
Load command with space character in a file name.

How was this patch tested?
Manually verified and added UT.
---
 docs/sql-programming-guide.md                     |  1 +
 .../spark/sql/hive/execution/SQLQuerySuite.scala  | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 9da7d64322eb6..e262987ab23de 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1898,6 +1898,7 @@ working with timestamps in `pandas_udf`s to get the best performance, see
   - Since Spark 2.4, File listing for compute statistics is done in parallel by default. This can be disabled by setting `spark.sql.parallelFileListingInStatsComputation.enabled` to `False`.
   - Since Spark 2.4, Metadata files (e.g. Parquet summary files) and temporary files are not counted as data files when calculating table size during Statistics computation.
   - Since Spark 2.4, empty strings are saved as quoted empty strings `""`. In version 2.3 and earlier, empty strings are equal to `null` values and do not reflect to any characters in saved CSV files. For example, the row of `"a", null, "", 1` was writted as `a,,,1`. Since Spark 2.4, the same row is saved as `a,,"",1`. To restore the previous behavior, set the CSV option `emptyValue` to empty (not quoted) string.  
+  - Since Spark 2.4, The LOAD DATA command supports wildcard `?` and `*`, which match any one character, and zero or more characters, respectively. Example: `LOAD DATA INPATH '/tmp/folder*/'` or `LOAD DATA INPATH '/tmp/part-?'`. Special Characters like `space` also now work in paths. Example: `LOAD DATA INPATH '/tmp/folder name/'`.
 
 ## Upgrading From Spark SQL 2.3.0 to 2.3.1 and above
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 20c4c36c05091..e49aea267026e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1916,6 +1916,21 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("SPARK-23425 Test LOAD DATA LOCAL INPATH with space in file name") {
+    withTempDir { dir =>
+      val path = dir.toURI.toString.stripSuffix("/")
+      val dirPath = dir.getAbsoluteFile
+      for (i <- 1 to 3) {
+        Files.write(s"$i", new File(dirPath, s"part-r-0000 $i"), StandardCharsets.UTF_8)
+      }
+      withTable("load_t") {
+        sql("CREATE TABLE load_t (a STRING)")
+        sql(s"LOAD DATA LOCAL INPATH '$path/part-r-0000 1' INTO TABLE load_t")
+        checkAnswer(sql("SELECT * FROM load_t"), Seq(Row("1")))
+      }
+    }
+  }
+
   test("Support wildcard character in folderlevel for LOAD DATA LOCAL INPATH") {
     withTempDir { dir =>
       val path = dir.toURI.toString.stripSuffix("/")