Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ private[libsvm] class LibSVMFileFormat
"though the input. If you know the number in advance, please specify it via " +
"'numFeatures' option to avoid the extra scan.")

val paths = files.map(_.getPath.toUri.toString)
val paths = files.map(_.getPath.toString)
val parsed = MLUtils.parseLibSVMFile(sparkSession, paths)
MLUtils.computeNumFeatures(parsed)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ object MLUtils extends Logging {
DataSource.apply(
sparkSession,
paths = paths,
className = classOf[TextFileFormat].getName
className = classOf[TextFileFormat].getName,
options = Map(DataSource.GLOB_PATHS_KEY -> "false")
).resolveRelation(checkFilesExist = false))
.select("value")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,24 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
spark.sql("DROP TABLE IF EXISTS libsvmTable")
}
}

test("SPARK-32815: Test LibSVM data source on file paths with glob metacharacters") {
withTempDir { dir =>
val basePath = dir.getCanonicalPath
// test libsvm writer / reader without specifying schema
val svmFileName = "[abc]"
val escapedSvmFileName = "\\[abc\\]"
val rawData = new java.util.ArrayList[Row]()
rawData.add(Row(1.0, Vectors.sparse(2, Seq((0, 2.0), (1, 3.0)))))
val struct = new StructType()
.add("labelFoo", DoubleType, false)
.add("featuresBar", VectorType, false)
val df = spark.createDataFrame(rawData, struct)
df.write.format("libsvm").save(s"$basePath/$svmFileName")
val df2 = spark.read.format("libsvm").load(s"$basePath/$escapedSvmFileName")
val row1 = df2.first()
val v = row1.getAs[SparseVector](1)
assert(v == Vectors.sparse(2, Seq((0, 2.0), (1, 3.0))))
}
}
}