Skip to content
Permalink
Browse files

[SPARK-27552][SQL] The configuration `hive.exec.stagingdir` is invali…

…d on Windows OS

## What changes were proposed in this pull requesst?
If we set `hive.exec.stagingdir=.test-staging\tmp`,
But the staging directory is still `.hive-staging` on Windows OS.

Reasons for failure:
Test code:
```
 val path = new Path("C:\\test\\hivetable")
  println("path.toString: " + path.toString)
  println("path.toUri.getPath: " + path.toUri.getPath)
```

Output:
```
path.toString: C:/test/hivetable
path.toUri.getPath: /C:/test/hivetable
```
 We can see that `path.toUri.getPath` has one more separator than `path.toString`,  and the separator is   ' / ',  not  ' \ '
So `stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")` will return false
## How was this patch tested?
1. Existed tests
2. Manual testing on Windows OS

Closes #24446 from 10110346/stagingdir.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
  • Loading branch information...
10110346 authored and srowen committed May 17, 2019
1 parent 141a3bf commit 9bca99b29b8ce4f13252d3c07ef4d12d45cb82b4
@@ -210,12 +210,11 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
stagingDir)
}

private def getStagingDir(
private[hive] def getStagingDir(
inputPath: Path,
hadoopConf: Configuration,
stagingDir: String): Path = {
val inputPathUri: URI = inputPath.toUri
val inputPathName: String = inputPathUri.getPath
val inputPathName: String = inputPath.toString
val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
var stagingPathName: String =
if (inputPathName.indexOf(stagingDir) == -1) {
@@ -228,7 +227,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
// staging directory needs to avoid being deleted when users set hive.exec.stagingdir
// under the table directory.
if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
!stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
!stagingPathName.stripPrefix(inputPathName).stripPrefix("/").startsWith(".")) {
logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
"with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
"directory.")
@@ -19,12 +19,14 @@ package org.apache.spark.sql.hive

import java.io.File

import org.scalatest.BeforeAndAfter
import org.apache.hadoop.fs.Path
import org.scalatest.{BeforeAndAfter, PrivateMethodTester}

import org.apache.spark.SparkException
import org.apache.spark.sql.{QueryTest, _}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable
import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
@@ -36,7 +38,7 @@ case class TestData(key: Int, value: String)
case class ThreeCloumntable(key: Int, value: String, key1: String)

class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
with SQLTestUtils {
with SQLTestUtils with PrivateMethodTester {
import spark.implicits._

override lazy val testData = spark.sparkContext.parallelize(
@@ -550,6 +552,32 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
}
}

test("SPARK-27552: hive.exec.stagingdir is invalid on Windows OS") {
val conf = spark.sessionState.newHadoopConf()
val inputPath = new Path("/tmp/b/c")
var stagingDir = "tmp/b"
val saveHiveFile = InsertIntoHiveTable(null, Map.empty, null, false, false, null)
val getStagingDir = PrivateMethod[Path]('getStagingDir)
var path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
assert(path.toString.indexOf("/tmp/b_hive_") != -1)

stagingDir = "tmp/b/c"
path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)

stagingDir = "d/e"
path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)

stagingDir = ".d/e"
path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
assert(path.toString.indexOf("/tmp/b/c/.d/e_hive_") != -1)

stagingDir = "/tmp/c/"
path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
assert(path.toString.indexOf("/tmp/c_hive_") != -1)
}

test("insert overwrite to dir from hive metastore table") {
withTempDir { dir =>
val path = dir.toURI.getPath

0 comments on commit 9bca99b

Please sign in to comment.
You can’t perform that action at this time.