Skip to content

Commit

Permalink
update since version in mllib.clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Aug 25, 2015
1 parent 9205907 commit 79678a2
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import org.apache.spark.util.Utils
* @param maxIterations The maximum number of iterations to perform
*/
@Experimental
@Since("1.3.0")
class GaussianMixture private (
private var k: Int,
private var convergenceTol: Double,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ import org.apache.spark.sql.{SQLContext, Row}
*/
@Since("1.3.0")
@Experimental
class GaussianMixtureModel(
val weights: Array[Double],
val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable {
class GaussianMixtureModel @Since("1.3.0") (
@Since("1.3.0") val weights: Array[Double],
@Since("1.3.0") val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable {

require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match")

Expand Down Expand Up @@ -178,7 +178,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
(weight, new MultivariateGaussian(mu, sigma))
}.unzip

return new GaussianMixtureModel(weights.toArray, gaussians.toArray)
new GaussianMixtureModel(weights.toArray, gaussians.toArray)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import org.apache.spark.util.random.XORShiftRandom
* This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
* to it should be cached by the user.
*/
@Since("0.8.0")
class KMeans private (
private var k: Int,
private var maxIterations: Int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ import org.apache.spark.sql.Row
* A clustering model for K-means. Each point belongs to the cluster with the closest center.
*/
@Since("0.8.0")
class KMeansModel (
val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
class KMeansModel @Since("1.1.0") (@Since("1.0.0") val clusterCenters: Array[Vector])
extends Saveable with Serializable with PMMLExportable {

/**
* A Java-friendly constructor that takes an Iterable of Vectors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@ import org.apache.spark.util.BoundedPriorityQueue
* including local and distributed data structures.
*/
@Experimental
@Since("1.3.0")
abstract class LDAModel private[clustering] extends Saveable {

/** Number of topics */
@Since("1.3.0")
def k: Int

/** Vocabulary size (number of terms or terms in the vocabulary) */
@Since("1.3.0")
def vocabSize: Int

/**
Expand All @@ -57,6 +60,7 @@ abstract class LDAModel private[clustering] extends Saveable {
*
* This is the parameter to a Dirichlet distribution.
*/
@Since("1.5.0")
def docConcentration: Vector

/**
Expand All @@ -68,6 +72,7 @@ abstract class LDAModel private[clustering] extends Saveable {
* Note: The topics' distributions over terms are called "beta" in the original LDA paper
* by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
*/
@Since("1.5.0")
def topicConcentration: Double

/**
Expand All @@ -81,6 +86,7 @@ abstract class LDAModel private[clustering] extends Saveable {
* This is a matrix of size vocabSize x k, where each column is a topic.
* No guarantees are given about the ordering of the topics.
*/
@Since("1.3.0")
def topicsMatrix: Matrix

/**
Expand All @@ -91,6 +97,7 @@ abstract class LDAModel private[clustering] extends Saveable {
* (term indices, term weights in topic).
* Each topic's terms are sorted in order of decreasing weight.
*/
@Since("1.3.0")
def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])]

/**
Expand All @@ -102,6 +109,7 @@ abstract class LDAModel private[clustering] extends Saveable {
* (term indices, term weights in topic).
* Each topic's terms are sorted in order of decreasing weight.
*/
@Since("1.3.0")
def describeTopics(): Array[(Array[Int], Array[Double])] = describeTopics(vocabSize)

/* TODO (once LDA can be trained with Strings or given a dictionary)
Expand Down Expand Up @@ -185,10 +193,11 @@ abstract class LDAModel private[clustering] extends Saveable {
* @param topics Inferred topics (vocabSize x k matrix).
*/
@Experimental
@Since("1.3.0")
class LocalLDAModel private[clustering] (
val topics: Matrix,
override val docConcentration: Vector,
override val topicConcentration: Double,
@Since("1.3.0") val topics: Matrix,
@Since("1.5.0") override val docConcentration: Vector,
@Since("1.5.0") override val topicConcentration: Double,
override protected[clustering] val gammaShape: Double = 100)
extends LDAModel with Serializable {

Expand Down Expand Up @@ -376,6 +385,7 @@ class LocalLDAModel private[clustering] (
}

@Experimental
@Since("1.5.0")
object LocalLDAModel extends Loader[LocalLDAModel] {

private object SaveLoadV1_0 {
Expand Down Expand Up @@ -480,13 +490,14 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
* than the [[LocalLDAModel]].
*/
@Experimental
@Since("1.3.0")
class DistributedLDAModel private[clustering] (
private[clustering] val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
private[clustering] val globalTopicTotals: LDA.TopicCounts,
val k: Int,
val vocabSize: Int,
override val docConcentration: Vector,
override val topicConcentration: Double,
@Since("1.3.0") val k: Int,
@Since("1.3.0") val vocabSize: Int,
@Since("1.5.0") override val docConcentration: Vector,
@Since("1.5.0") override val topicConcentration: Double,
private[spark] val iterationTimes: Array[Double],
override protected[clustering] val gammaShape: Double = 100)
extends LDAModel {
Expand Down Expand Up @@ -604,6 +615,7 @@ class DistributedLDAModel private[clustering] (
* (term indices, topic indices). Note that terms will be omitted if not present in
* the document.
*/
@Since("1.5.0")
lazy val topicAssignments: RDD[(Long, Array[Int], Array[Int])] = {
// For reference, compare the below code with the core part of EMLDAOptimizer.next().
val eta = topicConcentration
Expand Down Expand Up @@ -635,6 +647,7 @@ class DistributedLDAModel private[clustering] (
}

/** Java-friendly version of [[topicAssignments]] */
@Since("1.5.0")
lazy val javaTopicAssignments: JavaRDD[(java.lang.Long, Array[Int], Array[Int])] = {
topicAssignments.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Int])]].toJavaRDD()
}
Expand Down Expand Up @@ -771,6 +784,7 @@ class DistributedLDAModel private[clustering] (


@Experimental
@Since("1.5.0")
object DistributedLDAModel extends Loader[DistributedLDAModel] {

private object SaveLoadV1_0 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@ import org.apache.spark.{Logging, SparkContext, SparkException}
*/
@Since("1.3.0")
@Experimental
class PowerIterationClusteringModel(
val k: Int,
val assignments: RDD[PowerIterationClustering.Assignment]) extends Saveable with Serializable {
class PowerIterationClusteringModel @Since("1.3.0") (
@Since("1.3.0") val k: Int,
@Since("1.3.0") val assignments: RDD[PowerIterationClustering.Assignment])
extends Saveable with Serializable {

@Since("1.4.0")
override def save(sc: SparkContext, path: String): Unit = {
Expand All @@ -56,6 +57,8 @@ class PowerIterationClusteringModel(

@Since("1.4.0")
object PowerIterationClusteringModel extends Loader[PowerIterationClusteringModel] {

@Since("1.4.0")
override def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
}
Expand Down Expand Up @@ -120,6 +123,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
* @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
*/
@Experimental
@Since("1.3.0")
class PowerIterationClustering private[clustering] (
private var k: Int,
private var maxIterations: Int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ import org.apache.spark.util.random.XORShiftRandom
*/
@Since("1.2.0")
@Experimental
class StreamingKMeansModel(
override val clusterCenters: Array[Vector],
val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) with Logging {
class StreamingKMeansModel @Since("1.2.0") (
@Since("1.2.0") override val clusterCenters: Array[Vector],
@Since("1.2.0") val clusterWeights: Array[Double])
extends KMeansModel(clusterCenters) with Logging {

/**
* Perform a k-means update on a batch of data.
Expand Down Expand Up @@ -168,10 +169,10 @@ class StreamingKMeansModel(
*/
@Since("1.2.0")
@Experimental
class StreamingKMeans(
var k: Int,
var decayFactor: Double,
var timeUnit: String) extends Logging with Serializable {
class StreamingKMeans @Since("1.2.0") (
@Since("1.2.0") var k: Int,
@Since("1.2.0") var decayFactor: Double,
@Since("1.2.0") var timeUnit: String) extends Logging with Serializable {

@Since("1.2.0")
def this() = this(2, 1.0, StreamingKMeans.BATCHES)
Expand Down

0 comments on commit 79678a2

Please sign in to comment.