Skip to content

Commit

Permalink
[SPARK-10237] [MLLIB] update since versions in mllib.fpm
Browse files Browse the repository at this point in the history
Same as #8421 but for `mllib.fpm`.

cc feynmanliang

Author: Xiangrui Meng <meng@databricks.com>

Closes #8429 from mengxr/SPARK-10237.

(cherry picked from commit c619c75)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
  • Loading branch information
mengxr committed Aug 25, 2015
1 parent 95e44b4 commit 186326d
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ class AssociationRules private[fpm] (
}.filter(_.confidence >= minConfidence)
}

/** Java-friendly version of [[run]]. */
@Since("1.5.0")
def run[Item](freqItemsets: JavaRDD[FreqItemset[Item]]): JavaRDD[Rule[Item]] = {
val tag = fakeClassTag[Item]
run(freqItemsets.rdd)(tag)
}
}

@Since("1.5.0")
object AssociationRules {

/**
Expand All @@ -104,8 +107,8 @@ object AssociationRules {
@Since("1.5.0")
@Experimental
class Rule[Item] private[fpm] (
val antecedent: Array[Item],
val consequent: Array[Item],
@Since("1.5.0") val antecedent: Array[Item],
@Since("1.5.0") val consequent: Array[Item],
freqUnion: Double,
freqAntecedent: Double) extends Serializable {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("1.3.0")
@Experimental
class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
class FPGrowthModel[Item: ClassTag] @Since("1.3.0") (
@Since("1.3.0") val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
/**
* Generates association rules for the [[Item]]s in [[freqItemsets]].
* @param confidence minimal confidence of the rules produced
Expand Down Expand Up @@ -126,6 +127,8 @@ class FPGrowth private (
new FPGrowthModel(freqItemsets)
}

/** Java-friendly version of [[run]]. */
@Since("1.3.0")
def run[Item, Basket <: JavaIterable[Item]](data: JavaRDD[Basket]): FPGrowthModel[Item] = {
implicit val tag = fakeClassTag[Item]
run(data.rdd.map(_.asScala.toArray))
Expand Down Expand Up @@ -226,7 +229,9 @@ object FPGrowth {
*
*/
@Since("1.3.0")
class FreqItemset[Item](val items: Array[Item], val freq: Long) extends Serializable {
class FreqItemset[Item] @Since("1.3.0") (
@Since("1.3.0") val items: Array[Item],
@Since("1.3.0") val freq: Long) extends Serializable {

/**
* Returns items in a Java List.
Expand Down
23 changes: 20 additions & 3 deletions mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
import scala.reflect.ClassTag

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.rdd.RDD
Expand All @@ -51,6 +51,7 @@ import org.apache.spark.storage.StorageLevel
* (Wikipedia)]]
*/
@Experimental
@Since("1.5.0")
class PrefixSpan private (
private var minSupport: Double,
private var maxPatternLength: Int,
Expand All @@ -61,17 +62,20 @@ class PrefixSpan private (
* Constructs a default instance with default parameters
* {minSupport: `0.1`, maxPatternLength: `10`, maxLocalProjDBSize: `32000000L`}.
*/
@Since("1.5.0")
def this() = this(0.1, 10, 32000000L)

/**
* Get the minimal support (i.e. the frequency of occurrence before a pattern is considered
* frequent).
*/
@Since("1.5.0")
def getMinSupport: Double = minSupport

/**
* Sets the minimal support level (default: `0.1`).
*/
@Since("1.5.0")
def setMinSupport(minSupport: Double): this.type = {
require(minSupport >= 0 && minSupport <= 1,
s"The minimum support value must be in [0, 1], but got $minSupport.")
Expand All @@ -82,11 +86,13 @@ class PrefixSpan private (
/**
* Gets the maximal pattern length (i.e. the length of the longest sequential pattern to consider.
*/
@Since("1.5.0")
def getMaxPatternLength: Int = maxPatternLength

/**
* Sets maximal pattern length (default: `10`).
*/
@Since("1.5.0")
def setMaxPatternLength(maxPatternLength: Int): this.type = {
// TODO: support unbounded pattern length when maxPatternLength = 0
require(maxPatternLength >= 1,
Expand All @@ -98,12 +104,14 @@ class PrefixSpan private (
/**
* Gets the maximum number of items allowed in a projected database before local processing.
*/
@Since("1.5.0")
def getMaxLocalProjDBSize: Long = maxLocalProjDBSize

/**
* Sets the maximum number of items (including delimiters used in the internal storage format)
* allowed in a projected database before local processing (default: `32000000L`).
*/
@Since("1.5.0")
def setMaxLocalProjDBSize(maxLocalProjDBSize: Long): this.type = {
require(maxLocalProjDBSize >= 0L,
s"The maximum local projected database size must be nonnegative, but got $maxLocalProjDBSize")
Expand All @@ -116,6 +124,7 @@ class PrefixSpan private (
* @param data sequences of itemsets.
* @return a [[PrefixSpanModel]] that contains the frequent patterns
*/
@Since("1.5.0")
def run[Item: ClassTag](data: RDD[Array[Array[Item]]]): PrefixSpanModel[Item] = {
if (data.getStorageLevel == StorageLevel.NONE) {
logWarning("Input data is not cached.")
Expand Down Expand Up @@ -202,6 +211,7 @@ class PrefixSpan private (
* @tparam Sequence sequence type, which is an Iterable of Itemsets
* @return a [[PrefixSpanModel]] that contains the frequent sequential patterns
*/
@Since("1.5.0")
def run[Item, Itemset <: jl.Iterable[Item], Sequence <: jl.Iterable[Itemset]](
data: JavaRDD[Sequence]): PrefixSpanModel[Item] = {
implicit val tag = fakeClassTag[Item]
Expand All @@ -211,6 +221,7 @@ class PrefixSpan private (
}

@Experimental
@Since("1.5.0")
object PrefixSpan extends Logging {

/**
Expand Down Expand Up @@ -535,10 +546,14 @@ object PrefixSpan extends Logging {
* @param freq frequency
* @tparam Item item type
*/
class FreqSequence[Item](val sequence: Array[Array[Item]], val freq: Long) extends Serializable {
@Since("1.5.0")
class FreqSequence[Item] @Since("1.5.0") (
@Since("1.5.0") val sequence: Array[Array[Item]],
@Since("1.5.0") val freq: Long) extends Serializable {
/**
* Returns sequence as a Java List of lists for Java users.
*/
@Since("1.5.0")
def javaSequence: ju.List[ju.List[Item]] = sequence.map(_.toList.asJava).toList.asJava
}
}
Expand All @@ -548,5 +563,7 @@ object PrefixSpan extends Logging {
* @param freqSequences frequent sequences
* @tparam Item item type
*/
class PrefixSpanModel[Item](val freqSequences: RDD[PrefixSpan.FreqSequence[Item]])
@Since("1.5.0")
class PrefixSpanModel[Item] @Since("1.5.0") (
@Since("1.5.0") val freqSequences: RDD[PrefixSpan.FreqSequence[Item]])
extends Serializable

0 comments on commit 186326d

Please sign in to comment.