From 0ea1c1c5f4127c66015b5831943460456f102bdd Mon Sep 17 00:00:00 2001 From: lewuathe Date: Tue, 8 Sep 2015 00:22:17 +0900 Subject: [PATCH] LibSVMRelation is registered into META-INF --- .../ml/source/libsvm/LibSVMRelation.scala | 4 +++ .../spark/ml/source/libsvm/package.scala | 33 ------------------- .../ml/source/JavaLibSVMRelationSuite.java | 3 +- .../spark/ml/source/LibSVMRelationSuite.scala | 9 ++--- ...pache.spark.sql.sources.DataSourceRegister | 1 + 5 files changed, 11 insertions(+), 39 deletions(-) delete mode 100644 mllib/src/main/scala/org/apache/spark/ml/source/libsvm/package.scala diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala index 92114d56a0268..b0fc6603ced17 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala @@ -64,6 +64,10 @@ private[ml] class LibSVMRelation(val path: String, val numFeatures: Int, val vec } +/** + * This is used for creating DataFrame from LibSVM format file. + * The LibSVM file path must be specified to DefaultSource. + */ class DefaultSource extends RelationProvider with DataSourceRegister { override def shortName(): String = "libsvm" diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/package.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/package.scala deleted file mode 100644 index f15253c7657cc..0000000000000 --- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/package.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.ml.source - -import org.apache.spark.sql.{DataFrame, DataFrameReader} - -package object libsvm { - - /** - * Implicit declaration in order to be used from SQLContext. - * It is necessary to import org.apache.spark.ml.source.libsvm._ - * @param read Given original DataFrameReader - */ - implicit class LibSVMReader(read: DataFrameReader) { - def libsvm(filePath: String): DataFrame - = read.format(classOf[DefaultSource].getName).load(filePath) - } -} diff --git a/mllib/src/test/java/org/apache/spark/ml/source/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/JavaLibSVMRelationSuite.java index a00820d23773b..5ccbafb640cf1 100644 --- a/mllib/src/test/java/org/apache/spark/ml/source/JavaLibSVMRelationSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/source/JavaLibSVMRelationSuite.java @@ -68,8 +68,7 @@ public void tearDown() { @Test public void verifyLibSVMDF() { - dataset = jsql.read().format("org.apache.spark.ml.source.libsvm").option("vectorType", "dense") - .load(path.getPath()); + dataset = jsql.read().format("libsvm").option("vectorType", "dense").load(path.getPath()); Assert.assertEquals("label", dataset.columns()[0]); Assert.assertEquals("features", dataset.columns()[1]); Row r = dataset.first(); diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/LibSVMRelationSuite.scala index 8fa51f1d521b6..dc9980d526e9d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/LibSVMRelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/LibSVMRelationSuite.scala @@ -45,7 +45,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { } test("select as sparse vector") { - val df = sqlContext.read.libsvm(path) + val df = sqlContext.read.format("libsvm").load(path) assert(df.columns(0) == "label") assert(df.columns(1) == "features") val row1 = df.first() @@ -55,8 +55,8 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { } test("select as dense vector") { - val df = sqlContext.read.options(Map("vectorType" -> "dense")) - .libsvm(path) + val df = sqlContext.read.format("libsvm").options(Map("vectorType" -> "dense")) + .load(path) assert(df.columns(0) == "label") assert(df.columns(1) == "features") assert(df.count() == 3) @@ -75,7 +75,8 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { val tempDir = Utils.createTempDir() val file = new File(tempDir.getPath, "part-00001") Files.write(lines, file, Charsets.US_ASCII) - val df = sqlContext.read.option("numFeatures", "100").libsvm(tempDir.toURI.toString) + val df = sqlContext.read.option("numFeatures", "100").format("libsvm") + .load(tempDir.toURI.toString) val row1 = df.first() val v = row1.getAs[SparseVector](1) assert(v == Vectors.sparse(100, Seq((0, 1.0), (9, 2.0), (19, 3.0), (29, 4.0), (39, 5.0), diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index ca50000b4756e..55bebf96dabb2 100644 --- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -1,3 +1,4 @@ org.apache.spark.sql.execution.datasources.jdbc.DefaultSource org.apache.spark.sql.execution.datasources.json.DefaultSource org.apache.spark.sql.execution.datasources.parquet.DefaultSource +org.apache.spark.ml.source.libsvm.DefaultSource