apache · mengxr · Jun 21, 2016
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.source.libsvm
+
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.sql.{DataFrame, DataFrameReader}
+
+/**
+ * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as [[DataFrame]].
+ * The loaded [[DataFrame]] has two columns: `label` containing labels stored as doubles and
+ * `features` containing feature vectors stored as [[Vector]]s.
+ *
+ * To use LIBSVM data source, you need to set "libsvm" as the format in [[DataFrameReader]] and
+ * optionally specify options, for example:
+ * {{{
+ *   // Scala
+ *   val df = spark.read.format("libsvm")
+ *     .option("numFeatures", "780")
+ *     .load("data/mllib/sample_libsvm_data.txt")
+ *
+ *   // Java
+ *   Dataset<Row> df = spark.read().format("libsvm")
+ *     .option("numFeatures, "780")
+ *     .load("data/mllib/sample_libsvm_data.txt");
+ * }}}
+ *
+ * LIBSVM data source supports the following options:
+ *  - "numFeatures": number of features.
+ *    If unspecified or nonpositive, the number of features will be determined automatically at the
+ *    cost of one additional pass.
+ *    This is also useful when the dataset is already split into multiple files and you want to load
+ *    them separately, because some features may not present in certain files, which leads to
+ *    inconsistent feature dimensions.
+ *  - "vectorType": feature vector type, "sparse" (default) or "dense".
+ *
+ * Note that this class is public for documentation purpose. Please don't use this class directly.
+ * Rather, use the data source API as illustrated above.
+ *
+ * @see [[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ LIBSVM datasets]]
+ */
+class LibSVMDataSource private() {}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -25,11 +25,10 @@ import org.apache.hadoop.io.{NullWritable, Text}
 import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext}
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
 
-import org.apache.spark.annotation.Since
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.sql.{DataFrame, DataFrameReader, Row, SparkSession}
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
@@ -77,44 +76,10 @@ private[libsvm] class LibSVMOutputWriter(
   }
 }
 
-/**
- * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as [[DataFrame]].
- * The loaded [[DataFrame]] has two columns: `label` containing labels stored as doubles and
- * `features` containing feature vectors stored as [[Vector]]s.
- *
- * To use LIBSVM data source, you need to set "libsvm" as the format in [[DataFrameReader]] and
- * optionally specify options, for example:
- * {{{
- *   // Scala
- *   val df = spark.read.format("libsvm")
- *     .option("numFeatures", "780")
- *     .load("data/mllib/sample_libsvm_data.txt")
- *
- *   // Java
- *   Dataset<Row> df = spark.read().format("libsvm")
- *     .option("numFeatures, "780")
- *     .load("data/mllib/sample_libsvm_data.txt");
- * }}}
- *
- * LIBSVM data source supports the following options:
- *  - "numFeatures": number of features.
- *    If unspecified or nonpositive, the number of features will be determined automatically at the
- *    cost of one additional pass.
- *    This is also useful when the dataset is already split into multiple files and you want to load
- *    them separately, because some features may not present in certain files, which leads to
- *    inconsistent feature dimensions.
- *  - "vectorType": feature vector type, "sparse" (default) or "dense".
- *
- *  @see [[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ LIBSVM datasets]]
- *
- * Note that this class is public for documentation purpose. Please don't use this class directly.
- * Rather, use the data source API as illustrated above.
- */
+/** @see [[LibSVMDataSource]] for public documentation. */
 // If this is moved or renamed, please update DataSource's backwardCompatibilityMap.
-@Since("1.6.0")
-class LibSVMFileFormat extends TextBasedFileFormat with DataSourceRegister {
+private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
-  @Since("1.6.0")
   override def shortName(): String = "libsvm"
 
   override def toString: String = "LibSVM"