From 7b555f01841ea8a9a4fc48656636328cb8f254e9 Mon Sep 17 00:00:00 2001 From: madhu Date: Sat, 13 May 2017 12:00:11 +0530 Subject: [PATCH 1/6] Add intermediate storage level to tree based classifiers --- .../DecisionTreeClassifier.scala | 8 ++++--- .../RandomForestClassifier.scala | 3 ++- .../ml/regression/DecisionTreeRegressor.scala | 6 +++-- .../ml/regression/RandomForestRegressor.scala | 3 ++- .../spark/ml/tree/impl/RandomForest.scala | 3 ++- .../org/apache/spark/ml/tree/treeParams.scala | 22 +++++++++++++++++++ .../spark/mllib/tree/RandomForest.scala | 2 +- .../DecisionTreeClassifierSuite.scala | 8 +++++++ .../ml/tree/impl/RandomForestSuite.scala | 14 +++++++----- 9 files changed, 54 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 9f60f0896ec52..42226ef1eaac7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -114,7 +114,8 @@ class DecisionTreeClassifier @Since("1.4.0") ( instr.logParams(params: _*) val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all", - seed = $(seed), instr = Some(instr), parentUID = Some(uid)) + seed = $(seed), instr = Some(instr), parentUID = Some(uid), + intermediateStorageLevel = $(intermediateStorageLevel)) val m = trees.head.asInstanceOf[DecisionTreeClassificationModel] instr.logSuccess(m) @@ -127,8 +128,9 @@ class DecisionTreeClassifier @Since("1.4.0") ( val instr = Instrumentation.create(this, data) instr.logParams(params: _*) - val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy = "all", - seed = 0L, instr = Some(instr), parentUID = Some(uid)) + val trees = RandomForest.run(data, oldStrategy, numTrees = 2, featureSubsetStrategy = "all", + seed = 0L, instr = Some(instr), parentUID = Some(uid), + intermediateStorageLevel = $(intermediateStorageLevel)) val m = trees.head.asInstanceOf[DecisionTreeClassificationModel] instr.logSuccess(m) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index ab4c235209289..53ba0a8571c0a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -136,7 +136,8 @@ class RandomForestClassifier @Since("1.4.0") ( minInstancesPerNode, seed, subsamplingRate, thresholds, cacheNodeIds, checkpointInterval) val trees = RandomForest - .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr)) + .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr), + $(intermediateStorageLevel)) .map(_.asInstanceOf[DecisionTreeClassificationModel]) val numFeatures = oldDataset.first().features.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 01c5cc1c7efa9..366c2cdff5488 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -109,7 +109,8 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S instr.logParams(params: _*) val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all", - seed = $(seed), instr = Some(instr), parentUID = Some(uid)) + seed = $(seed), instr = Some(instr), parentUID = Some(uid), + intermediateStorageLevel = $(intermediateStorageLevel)) val m = trees.head.asInstanceOf[DecisionTreeRegressionModel] instr.logSuccess(m) @@ -123,7 +124,8 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S instr.logParams(params: _*) val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy = "all", - seed = $(seed), instr = Some(instr), parentUID = Some(uid)) + seed = $(seed), instr = Some(instr), parentUID = Some(uid), + intermediateStorageLevel = $(intermediateStorageLevel)) val m = trees.head.asInstanceOf[DecisionTreeRegressionModel] instr.logSuccess(m) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index a58da50fad972..f2b562d2314cf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -127,7 +127,8 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S minInstancesPerNode, seed, subsamplingRate, cacheNodeIds, checkpointInterval) val trees = RandomForest - .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr)) + .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr), + $(intermediateStorageLevel)) .map(_.asInstanceOf[DecisionTreeRegressionModel]) val numFeatures = oldDataset.first().features.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala index 82e1ed85a0a14..fc6957ea507a9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala @@ -92,6 +92,7 @@ private[spark] object RandomForest extends Logging { featureSubsetStrategy: String, seed: Long, instr: Option[Instrumentation[_]], + intermediateStorageLevel: String, parentUID: Option[String] = None): Array[DecisionTreeModel] = { val timer = new TimeTracker() @@ -130,7 +131,7 @@ private[spark] object RandomForest extends Logging { val baggedInput = BaggedPoint .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, withReplacement, seed) - .persist(StorageLevel.MEMORY_AND_DISK) + .persist(StorageLevel.fromString(intermediateStorageLevel)) // depth of the decision tree val maxDepth = strategy.maxDepth diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index cd1950bd76c05..0514d36e1a15e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -21,6 +21,7 @@ import java.util.Locale import scala.util.Try +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ @@ -29,6 +30,7 @@ import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, BoostingStrat import org.apache.spark.mllib.tree.impurity.{Entropy => OldEntropy, Gini => OldGini, Impurity => OldImpurity, Variance => OldVariance} import org.apache.spark.mllib.tree.loss.{AbsoluteError => OldAbsoluteError, ClassificationLoss => OldClassificationLoss, LogLoss => OldLogLoss, Loss => OldLoss, SquaredError => OldSquaredError} import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.storage.StorageLevel /** * Parameters for Decision Tree-based algorithms. @@ -183,6 +185,26 @@ private[ml] trait DecisionTreeParams extends PredictorParams @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + /** + * Param for StorageLevel for intermediate datasets. Pass in a string representation of + * `StorageLevel`. Cannot be "NONE". + * Default: "MEMORY_AND_DISK". + * + * @group expertParam + */ + val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel", + "StorageLevel for intermediate datasets. Cannot be 'NONE'.", + (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE") + + setDefault(intermediateStorageLevel -> "MEMORY_AND_DISK") + + /** @group expertGetParam */ + def getIntermediateStorageLevel: String = $(intermediateStorageLevel) + + /** @group expertSetParam */ + @Since("2.3.0") + def setIntermediateStorageLevel(value: String): this.type = set(intermediateStorageLevel, value) + /** (private[ml]) Create a Strategy instance to use with the old API. */ private[ml] def getOldStrategy( categoricalFeatures: Map[Int, Int], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index d1331a57de27b..73ead7c82fe1c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -92,7 +92,7 @@ private class RandomForest ( */ def run(input: RDD[LabeledPoint]): RandomForestModel = { val trees: Array[NewDTModel] = NewRandomForest.run(input.map(_.asML), strategy, numTrees, - featureSubsetStrategy, seed.toLong, None) + featureSubsetStrategy, seed.toLong, None, "MEMORY_AND_DISK") new RandomForestModel(strategy.algo, trees.map(_.toOld)) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala index 918ab27e2730b..ea56ae62d52c3 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala @@ -398,6 +398,14 @@ class DecisionTreeClassifierSuite testDefaultReadWrite(model) } + + test("intermediate dataset storage level") { + val dt = new DecisionTreeClassifier() + dt.getIntermediateStorageLevel == "MEMORY_AND_DISK" + // set and check the storage level + dt.setIntermediateStorageLevel("DISK_ONLY") + dt.getIntermediateStorageLevel == "DISK_ONLY" + } } private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala index df155b464c64b..27bd6de2cf773 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala @@ -199,7 +199,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { withClue("DecisionTree requires number of features > 0," + " but was given an empty features vector") { intercept[IllegalArgumentException] { - RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None) + RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None, "MEMORY_AND_DISK") } } } @@ -215,7 +215,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { numClasses = 2, maxBins = 5, categoricalFeaturesInfo = Map(0 -> 1, 1 -> 5)) - val Array(tree) = RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None) + val Array(tree) = RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None, + "MEMORY_AND_DISK") assert(tree.rootNode.impurity === -1.0) assert(tree.depth === 0) assert(tree.rootNode.prediction === lp.label) @@ -226,7 +227,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { Variance, maxDepth = 2, maxBins = 5) - val Array(tree2) = RandomForest.run(rdd, strategy2, 1, "all", 42L, instr = None) + val Array(tree2) = RandomForest.run(rdd, strategy2, 1, "all", 42L, instr = None, + "MEMORY_AND_DISK") assert(tree2.rootNode.impurity === -1.0) assert(tree2.depth === 0) assert(tree2.rootNode.prediction === lp.label) @@ -407,7 +409,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3), maxBins = 3) val model = RandomForest.run(input, strategy, numTrees = 1, featureSubsetStrategy = "all", - seed = 42, instr = None).head + seed = 42, instr = None, "MEMORY_AND_DISK").head model.rootNode match { case n: InternalNode => n.split match { case s: CategoricalSplit => @@ -430,9 +432,9 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { new OldStrategy(OldAlgo.Classification, Entropy, 3, 2, 100, maxMemoryInMB = 0) val tree1 = RandomForest.run(rdd, strategy1, numTrees = 1, featureSubsetStrategy = "all", - seed = 42, instr = None).head + seed = 42, instr = None, "MEMORY_AND_DISK").head val tree2 = RandomForest.run(rdd, strategy2, numTrees = 1, featureSubsetStrategy = "all", - seed = 42, instr = None).head + seed = 42, instr = None, "MEMORY_AND_DISK").head def getChildren(rootNode: Node): Array[InternalNode] = rootNode match { case n: InternalNode => From e066240fee09d7bb40e52f01852d2ffdb0efb7bf Mon Sep 17 00:00:00 2001 From: madhu Date: Wed, 24 May 2017 13:42:11 +0530 Subject: [PATCH 2/6] added to sharedParams --- .../ml/param/shared/SharedParamsCodeGen.scala | 9 ++++++++- .../spark/ml/param/shared/sharedParams.scala | 20 +++++++++++++++++++ .../org/apache/spark/ml/tree/treeParams.scala | 18 +---------------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index c94b8b4e9dfda..1d9a67434e4fd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -82,7 +82,11 @@ private[shared] object SharedParamsCodeGen { ParamDesc[String]("solver", "the solver algorithm for optimization. If this is not set or " + "empty, default value is 'auto'", Some("\"auto\"")), ParamDesc[Int]("aggregationDepth", "suggested depth for treeAggregate (>= 2)", Some("2"), - isValid = "ParamValidators.gtEq(2)", isExpertParam = true)) + isValid = "ParamValidators.gtEq(2)", isExpertParam = true), + ParamDesc[String]("intermediateStorageLevel", "Param for StorageLevel" + + "for intermediate datasets", Some("\"MEMORY_AND_DISK\""), + isValid = """(s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE"""", + isExpertParam = true)) val code = genSharedParams(params) val file = "src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala" @@ -207,7 +211,10 @@ private[shared] object SharedParamsCodeGen { | |package org.apache.spark.ml.param.shared | + |import scala.util.Try + | |import org.apache.spark.ml.param._ + |import org.apache.spark.storage.StorageLevel | |// DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen. | diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index e3e03dfd43dd6..0516457cf7ad3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -17,7 +17,10 @@ package org.apache.spark.ml.param.shared +import scala.util.Try + import org.apache.spark.ml.param._ +import org.apache.spark.storage.StorageLevel // DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen. @@ -406,4 +409,21 @@ private[ml] trait HasAggregationDepth extends Params { /** @group expertGetParam */ final def getAggregationDepth: Int = $(aggregationDepth) } + +/** + * Trait for shared param intermediateStorageLevel (default: "MEMORY_AND_DISK"). + */ +private[ml] trait HasIntermediateStorageLevel extends Params { + + /** + * Param for Param for StorageLevelfor intermediate datasets. + * @group expertParam + */ + final val intermediateStorageLevel: Param[String] = new Param[String](this, "intermediateStorageLevel", "Param for StorageLevelfor intermediate datasets", (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE") + + setDefault(intermediateStorageLevel, "MEMORY_AND_DISK") + + /** @group expertGetParam */ + final def getIntermediateStorageLevel: String = $(intermediateStorageLevel) +} // scalastyle:on diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 0514d36e1a15e..84617bb26dcec 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -38,7 +38,7 @@ import org.apache.spark.storage.StorageLevel * Note: Marked as private and DeveloperApi since this may be made public in the future. */ private[ml] trait DecisionTreeParams extends PredictorParams - with HasCheckpointInterval with HasSeed { + with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{ /** * Maximum depth of the tree (>= 0). @@ -185,22 +185,6 @@ private[ml] trait DecisionTreeParams extends PredictorParams @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) - /** - * Param for StorageLevel for intermediate datasets. Pass in a string representation of - * `StorageLevel`. Cannot be "NONE". - * Default: "MEMORY_AND_DISK". - * - * @group expertParam - */ - val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel", - "StorageLevel for intermediate datasets. Cannot be 'NONE'.", - (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE") - - setDefault(intermediateStorageLevel -> "MEMORY_AND_DISK") - - /** @group expertGetParam */ - def getIntermediateStorageLevel: String = $(intermediateStorageLevel) - /** @group expertSetParam */ @Since("2.3.0") def setIntermediateStorageLevel(value: String): this.type = set(intermediateStorageLevel, value) From 4e48f5c42d746ac0ab48c30e87d781e47bab4820 Mon Sep 17 00:00:00 2001 From: madhu Date: Wed, 24 May 2017 13:50:19 +0530 Subject: [PATCH 3/6] ALS uses generic param now --- .../org/apache/spark/ml/recommendation/ALS.scala | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index d626f04599670..136ad4fc05a35 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -129,7 +129,7 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo * Common params for ALS. */ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam - with HasPredictionCol with HasCheckpointInterval with HasSeed { + with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{ /** * Param for rank of the matrix factorization (positive). @@ -205,20 +205,6 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w /** @group getParam */ def getNonnegative: Boolean = $(nonnegative) - /** - * Param for StorageLevel for intermediate datasets. Pass in a string representation of - * `StorageLevel`. Cannot be "NONE". - * Default: "MEMORY_AND_DISK". - * - * @group expertParam - */ - val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel", - "StorageLevel for intermediate datasets. Cannot be 'NONE'.", - (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE") - - /** @group expertGetParam */ - def getIntermediateStorageLevel: String = $(intermediateStorageLevel) - /** * Param for StorageLevel for ALS model factors. Pass in a string representation of * `StorageLevel`. From bfe81c3d786ae82cd6dcc89b64384e5e1e584c5a Mon Sep 17 00:00:00 2001 From: madhu Date: Tue, 14 Nov 2017 10:30:07 +0530 Subject: [PATCH 4/6] added mima excludes --- project/MimaExcludes.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index e6f136c7c8b0a..aefc7fb7772eb 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -1051,6 +1051,13 @@ object MimaExcludes { // [SPARK-21680][ML][MLLIB]optimzie Vector coompress ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparseWithSize"), ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparseWithSize") + ) ++ Seq( + // [SPARK-20723][ML]Add intermediate storage level to tree based classifiers + ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.org$apache$spark$ml$param$shared$HasIntermediateStorageLevel$_setter_$intermediateStorageLevel_="), + ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.getIntermediateStorageLevel"), + ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.intermediateStorageLevel"), + ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.recommendation.ALS.getIntermediateStorageLevel"), + ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.recommendation.ALS.intermediateStorageLevel") ) } From 2ed39ccb57951fbd380fbc7dfa50f7ba69711a4c Mon Sep 17 00:00:00 2001 From: madhu Date: Tue, 14 Nov 2017 10:30:41 +0530 Subject: [PATCH 5/6] style change --- .../src/main/scala/org/apache/spark/ml/recommendation/ALS.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index c6c8798f3c0b0..0863437c32b0e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -130,7 +130,7 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo * Common params for ALS. */ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam - with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{ + with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel { /** * Param for rank of the matrix factorization (positive). From dd197173e3775bddf8b8e03bcf766578de3e4c9e Mon Sep 17 00:00:00 2001 From: madhu Date: Sat, 18 Nov 2017 21:46:36 +0530 Subject: [PATCH 6/6] fixed merge issues --- .../apache/spark/ml/param/shared/SharedParamsCodeGen.scala | 4 ++-- .../scala/org/apache/spark/ml/param/shared/sharedParams.scala | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 2e7108b60b47f..e7013588b3141 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -87,12 +87,12 @@ private[shared] object SharedParamsCodeGen { ParamDesc[String]("intermediateStorageLevel", "Param for StorageLevel" + "for intermediate datasets", Some("\"MEMORY_AND_DISK\""), isValid = """(s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE"""", + isExpertParam = true), ParamDesc[Boolean]("collectSubModels", "If set to false, then only the single best " + "sub-model will be available after fitting. If set to true, then all sub-models will be " + "available. Warning: For large models, collecting all sub-models can cause OOMs on the " + "Spark driver.", - Some("false"), isExpertParam = true) - isExpertParam = true)) + Some("false"), isExpertParam = true)) val code = genSharedParams(params) val file = "src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala" diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index ec87f2e047b46..d5d8f698e37f7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -505,5 +505,4 @@ private[ml] trait HasCollectSubModels extends Params { /** @group expertGetParam */ final def getCollectSubModels: Boolean = $(collectSubModels) } -/ // scalastyle:on