From 7b555f01841ea8a9a4fc48656636328cb8f254e9 Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Sat, 13 May 2017 12:00:11 +0530
Subject: [PATCH 1/6] Add intermediate storage level to tree based classifiers

---
 .../DecisionTreeClassifier.scala              |  8 ++++---
 .../RandomForestClassifier.scala              |  3 ++-
 .../ml/regression/DecisionTreeRegressor.scala |  6 +++--
 .../ml/regression/RandomForestRegressor.scala |  3 ++-
 .../spark/ml/tree/impl/RandomForest.scala     |  3 ++-
 .../org/apache/spark/ml/tree/treeParams.scala | 22 +++++++++++++++++++
 .../spark/mllib/tree/RandomForest.scala       |  2 +-
 .../DecisionTreeClassifierSuite.scala         |  8 +++++++
 .../ml/tree/impl/RandomForestSuite.scala      | 14 +++++++-----
 9 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 9f60f0896ec52..42226ef1eaac7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -114,7 +114,8 @@ class DecisionTreeClassifier @Since("1.4.0") (
     instr.logParams(params: _*)
 
     val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = $(seed), instr = Some(instr), parentUID = Some(uid))
+      seed = $(seed), instr = Some(instr), parentUID = Some(uid),
+        intermediateStorageLevel = $(intermediateStorageLevel))
 
     val m = trees.head.asInstanceOf[DecisionTreeClassificationModel]
     instr.logSuccess(m)
@@ -127,8 +128,9 @@ class DecisionTreeClassifier @Since("1.4.0") (
     val instr = Instrumentation.create(this, data)
     instr.logParams(params: _*)
 
-    val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = 0L, instr = Some(instr), parentUID = Some(uid))
+    val trees = RandomForest.run(data, oldStrategy, numTrees = 2, featureSubsetStrategy = "all",
+      seed = 0L, instr = Some(instr), parentUID = Some(uid),
+        intermediateStorageLevel = $(intermediateStorageLevel))
 
     val m = trees.head.asInstanceOf[DecisionTreeClassificationModel]
     instr.logSuccess(m)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index ab4c235209289..53ba0a8571c0a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -136,7 +136,8 @@ class RandomForestClassifier @Since("1.4.0") (
       minInstancesPerNode, seed, subsamplingRate, thresholds, cacheNodeIds, checkpointInterval)
 
     val trees = RandomForest
-      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
+      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr),
+         $(intermediateStorageLevel))
       .map(_.asInstanceOf[DecisionTreeClassificationModel])
 
     val numFeatures = oldDataset.first().features.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 01c5cc1c7efa9..366c2cdff5488 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -109,7 +109,8 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     instr.logParams(params: _*)
 
     val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = $(seed), instr = Some(instr), parentUID = Some(uid))
+      seed = $(seed), instr = Some(instr), parentUID = Some(uid),
+        intermediateStorageLevel = $(intermediateStorageLevel))
 
     val m = trees.head.asInstanceOf[DecisionTreeRegressionModel]
     instr.logSuccess(m)
@@ -123,7 +124,8 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     instr.logParams(params: _*)
 
     val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = $(seed), instr = Some(instr), parentUID = Some(uid))
+      seed = $(seed), instr = Some(instr), parentUID = Some(uid),
+        intermediateStorageLevel = $(intermediateStorageLevel))
 
     val m = trees.head.asInstanceOf[DecisionTreeRegressionModel]
     instr.logSuccess(m)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index a58da50fad972..f2b562d2314cf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -127,7 +127,8 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
       minInstancesPerNode, seed, subsamplingRate, cacheNodeIds, checkpointInterval)
 
     val trees = RandomForest
-      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
+      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr),
+        $(intermediateStorageLevel))
       .map(_.asInstanceOf[DecisionTreeRegressionModel])
 
     val numFeatures = oldDataset.first().features.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 82e1ed85a0a14..fc6957ea507a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -92,6 +92,7 @@ private[spark] object RandomForest extends Logging {
       featureSubsetStrategy: String,
       seed: Long,
       instr: Option[Instrumentation[_]],
+      intermediateStorageLevel: String,
       parentUID: Option[String] = None): Array[DecisionTreeModel] = {
 
     val timer = new TimeTracker()
@@ -130,7 +131,7 @@ private[spark] object RandomForest extends Logging {
 
     val baggedInput = BaggedPoint
       .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, withReplacement, seed)
-      .persist(StorageLevel.MEMORY_AND_DISK)
+      .persist(StorageLevel.fromString(intermediateStorageLevel))
 
     // depth of the decision tree
     val maxDepth = strategy.maxDepth
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index cd1950bd76c05..0514d36e1a15e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -21,6 +21,7 @@ import java.util.Locale
 
 import scala.util.Try
 
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -29,6 +30,7 @@ import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, BoostingStrat
 import org.apache.spark.mllib.tree.impurity.{Entropy => OldEntropy, Gini => OldGini, Impurity => OldImpurity, Variance => OldVariance}
 import org.apache.spark.mllib.tree.loss.{AbsoluteError => OldAbsoluteError, ClassificationLoss => OldClassificationLoss, LogLoss => OldLogLoss, Loss => OldLoss, SquaredError => OldSquaredError}
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
+import org.apache.spark.storage.StorageLevel
 
 /**
  * Parameters for Decision Tree-based algorithms.
@@ -183,6 +185,26 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /**
+   * Param for StorageLevel for intermediate datasets. Pass in a string representation of
+   * `StorageLevel`. Cannot be "NONE".
+   * Default: "MEMORY_AND_DISK".
+   *
+   * @group expertParam
+   */
+  val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel",
+    "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
+    (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE")
+
+  setDefault(intermediateStorageLevel -> "MEMORY_AND_DISK")
+
+  /** @group expertGetParam */
+  def getIntermediateStorageLevel: String = $(intermediateStorageLevel)
+
+  /** @group expertSetParam */
+  @Since("2.3.0")
+  def setIntermediateStorageLevel(value: String): this.type = set(intermediateStorageLevel, value)
+
   /** (private[ml]) Create a Strategy instance to use with the old API. */
   private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index d1331a57de27b..73ead7c82fe1c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -92,7 +92,7 @@ private class RandomForest (
    */
   def run(input: RDD[LabeledPoint]): RandomForestModel = {
     val trees: Array[NewDTModel] = NewRandomForest.run(input.map(_.asML), strategy, numTrees,
-      featureSubsetStrategy, seed.toLong, None)
+      featureSubsetStrategy, seed.toLong, None, "MEMORY_AND_DISK")
     new RandomForestModel(strategy.algo, trees.map(_.toOld))
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 918ab27e2730b..ea56ae62d52c3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -398,6 +398,14 @@ class DecisionTreeClassifierSuite
 
     testDefaultReadWrite(model)
   }
+
+  test("intermediate dataset storage level") {
+    val dt = new DecisionTreeClassifier()
+    dt.getIntermediateStorageLevel == "MEMORY_AND_DISK"
+    // set and check the storage level
+    dt.setIntermediateStorageLevel("DISK_ONLY")
+    dt.getIntermediateStorageLevel == "DISK_ONLY"
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index df155b464c64b..27bd6de2cf773 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -199,7 +199,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     withClue("DecisionTree requires number of features > 0," +
       " but was given an empty features vector") {
       intercept[IllegalArgumentException] {
-        RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None)
+        RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None, "MEMORY_AND_DISK")
       }
     }
   }
@@ -215,7 +215,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
           numClasses = 2,
           maxBins = 5,
           categoricalFeaturesInfo = Map(0 -> 1, 1 -> 5))
-    val Array(tree) = RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None)
+    val Array(tree) = RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None,
+      "MEMORY_AND_DISK")
     assert(tree.rootNode.impurity === -1.0)
     assert(tree.depth === 0)
     assert(tree.rootNode.prediction === lp.label)
@@ -226,7 +227,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       Variance,
       maxDepth = 2,
       maxBins = 5)
-    val Array(tree2) = RandomForest.run(rdd, strategy2, 1, "all", 42L, instr = None)
+    val Array(tree2) = RandomForest.run(rdd, strategy2, 1, "all", 42L, instr = None,
+      "MEMORY_AND_DISK")
     assert(tree2.rootNode.impurity === -1.0)
     assert(tree2.depth === 0)
     assert(tree2.rootNode.prediction === lp.label)
@@ -407,7 +409,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3), maxBins = 3)
 
     val model = RandomForest.run(input, strategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = 42, instr = None).head
+      seed = 42, instr = None, "MEMORY_AND_DISK").head
     model.rootNode match {
       case n: InternalNode => n.split match {
         case s: CategoricalSplit =>
@@ -430,9 +432,9 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       new OldStrategy(OldAlgo.Classification, Entropy, 3, 2, 100, maxMemoryInMB = 0)
 
     val tree1 = RandomForest.run(rdd, strategy1, numTrees = 1, featureSubsetStrategy = "all",
-      seed = 42, instr = None).head
+      seed = 42, instr = None, "MEMORY_AND_DISK").head
     val tree2 = RandomForest.run(rdd, strategy2, numTrees = 1, featureSubsetStrategy = "all",
-      seed = 42, instr = None).head
+      seed = 42, instr = None, "MEMORY_AND_DISK").head
 
     def getChildren(rootNode: Node): Array[InternalNode] = rootNode match {
       case n: InternalNode =>

From e066240fee09d7bb40e52f01852d2ffdb0efb7bf Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Wed, 24 May 2017 13:42:11 +0530
Subject: [PATCH 2/6] added to sharedParams

---
 .../ml/param/shared/SharedParamsCodeGen.scala |  9 ++++++++-
 .../spark/ml/param/shared/sharedParams.scala  | 20 +++++++++++++++++++
 .../org/apache/spark/ml/tree/treeParams.scala | 18 +----------------
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index c94b8b4e9dfda..1d9a67434e4fd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -82,7 +82,11 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[String]("solver", "the solver algorithm for optimization. If this is not set or " +
         "empty, default value is 'auto'", Some("\"auto\"")),
       ParamDesc[Int]("aggregationDepth", "suggested depth for treeAggregate (>= 2)", Some("2"),
-        isValid = "ParamValidators.gtEq(2)", isExpertParam = true))
+        isValid = "ParamValidators.gtEq(2)", isExpertParam = true),
+      ParamDesc[String]("intermediateStorageLevel", "Param for StorageLevel" +
+        "for intermediate datasets", Some("\"MEMORY_AND_DISK\""),
+           isValid = """(s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE"""",
+             isExpertParam = true))
 
     val code = genSharedParams(params)
     val file = "src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala"
@@ -207,7 +211,10 @@ private[shared] object SharedParamsCodeGen {
         |
         |package org.apache.spark.ml.param.shared
         |
+        |import scala.util.Try
+        |
         |import org.apache.spark.ml.param._
+        |import org.apache.spark.storage.StorageLevel
         |
         |// DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen.
         |
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index e3e03dfd43dd6..0516457cf7ad3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.ml.param.shared
 
+import scala.util.Try
+
 import org.apache.spark.ml.param._
+import org.apache.spark.storage.StorageLevel
 
 // DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen.
 
@@ -406,4 +409,21 @@ private[ml] trait HasAggregationDepth extends Params {
   /** @group expertGetParam */
   final def getAggregationDepth: Int = $(aggregationDepth)
 }
+
+/**
+ * Trait for shared param intermediateStorageLevel (default: "MEMORY_AND_DISK").
+ */
+private[ml] trait HasIntermediateStorageLevel extends Params {
+
+  /**
+   * Param for Param for StorageLevelfor intermediate datasets.
+   * @group expertParam
+   */
+  final val intermediateStorageLevel: Param[String] = new Param[String](this, "intermediateStorageLevel", "Param for StorageLevelfor intermediate datasets", (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE")
+
+  setDefault(intermediateStorageLevel, "MEMORY_AND_DISK")
+
+  /** @group expertGetParam */
+  final def getIntermediateStorageLevel: String = $(intermediateStorageLevel)
+}
 // scalastyle:on
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 0514d36e1a15e..84617bb26dcec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -38,7 +38,7 @@ import org.apache.spark.storage.StorageLevel
  * Note: Marked as private and DeveloperApi since this may be made public in the future.
  */
 private[ml] trait DecisionTreeParams extends PredictorParams
-  with HasCheckpointInterval with HasSeed {
+  with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{
 
   /**
    * Maximum depth of the tree (>= 0).
@@ -185,22 +185,6 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
-  /**
-   * Param for StorageLevel for intermediate datasets. Pass in a string representation of
-   * `StorageLevel`. Cannot be "NONE".
-   * Default: "MEMORY_AND_DISK".
-   *
-   * @group expertParam
-   */
-  val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel",
-    "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
-    (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE")
-
-  setDefault(intermediateStorageLevel -> "MEMORY_AND_DISK")
-
-  /** @group expertGetParam */
-  def getIntermediateStorageLevel: String = $(intermediateStorageLevel)
-
   /** @group expertSetParam */
   @Since("2.3.0")
   def setIntermediateStorageLevel(value: String): this.type = set(intermediateStorageLevel, value)

From 4e48f5c42d746ac0ab48c30e87d781e47bab4820 Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Wed, 24 May 2017 13:50:19 +0530
Subject: [PATCH 3/6] ALS uses generic param now

---
 .../org/apache/spark/ml/recommendation/ALS.scala | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index d626f04599670..136ad4fc05a35 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -129,7 +129,7 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo
  * Common params for ALS.
  */
 private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam
-  with HasPredictionCol with HasCheckpointInterval with HasSeed {
+  with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{
 
   /**
    * Param for rank of the matrix factorization (positive).
@@ -205,20 +205,6 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
   /** @group getParam */
   def getNonnegative: Boolean = $(nonnegative)
 
-  /**
-   * Param for StorageLevel for intermediate datasets. Pass in a string representation of
-   * `StorageLevel`. Cannot be "NONE".
-   * Default: "MEMORY_AND_DISK".
-   *
-   * @group expertParam
-   */
-  val intermediateStorageLevel = new Param[String](this, "intermediateStorageLevel",
-    "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
-    (s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE")
-
-  /** @group expertGetParam */
-  def getIntermediateStorageLevel: String = $(intermediateStorageLevel)
-
   /**
    * Param for StorageLevel for ALS model factors. Pass in a string representation of
    * `StorageLevel`.

From bfe81c3d786ae82cd6dcc89b64384e5e1e584c5a Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Tue, 14 Nov 2017 10:30:07 +0530
Subject: [PATCH 4/6] added mima excludes

---
 project/MimaExcludes.scala | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index e6f136c7c8b0a..aefc7fb7772eb 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -1051,6 +1051,13 @@ object MimaExcludes {
       // [SPARK-21680][ML][MLLIB]optimzie Vector coompress
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparseWithSize"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparseWithSize")
+    ) ++ Seq(
+      // [SPARK-20723][ML]Add intermediate storage level to tree based classifiers
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.org$apache$spark$ml$param$shared$HasIntermediateStorageLevel$_setter_$intermediateStorageLevel_="),
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.getIntermediateStorageLevel"),
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasIntermediateStorageLevel.intermediateStorageLevel"),
+      ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.recommendation.ALS.getIntermediateStorageLevel"),
+      ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.recommendation.ALS.intermediateStorageLevel")
     )
   }
 

From 2ed39ccb57951fbd380fbc7dfa50f7ba69711a4c Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Tue, 14 Nov 2017 10:30:41 +0530
Subject: [PATCH 5/6] style change

---
 .../src/main/scala/org/apache/spark/ml/recommendation/ALS.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index c6c8798f3c0b0..0863437c32b0e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -130,7 +130,7 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo
  * Common params for ALS.
  */
 private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam
-  with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel{
+  with HasPredictionCol with HasCheckpointInterval with HasSeed with HasIntermediateStorageLevel {
 
   /**
    * Param for rank of the matrix factorization (positive).

From dd197173e3775bddf8b8e03bcf766578de3e4c9e Mon Sep 17 00:00:00 2001
From: madhu <phatak.dev@gmail.com>
Date: Sat, 18 Nov 2017 21:46:36 +0530
Subject: [PATCH 6/6] fixed merge issues

---
 .../apache/spark/ml/param/shared/SharedParamsCodeGen.scala    | 4 ++--
 .../scala/org/apache/spark/ml/param/shared/sharedParams.scala | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 2e7108b60b47f..e7013588b3141 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -87,12 +87,12 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[String]("intermediateStorageLevel", "Param for StorageLevel" +
         "for intermediate datasets", Some("\"MEMORY_AND_DISK\""),
            isValid = """(s: String) => Try(StorageLevel.fromString(s)).isSuccess && s != "NONE"""",
+           isExpertParam = true),
       ParamDesc[Boolean]("collectSubModels", "If set to false, then only the single best " +
         "sub-model will be available after fitting. If set to true, then all sub-models will be " +
         "available. Warning: For large models, collecting all sub-models can cause OOMs on the " +
         "Spark driver.",
-        Some("false"), isExpertParam = true)
-            isExpertParam = true))
+        Some("false"), isExpertParam = true))
 
     val code = genSharedParams(params)
     val file = "src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala"
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index ec87f2e047b46..d5d8f698e37f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -505,5 +505,4 @@ private[ml] trait HasCollectSubModels extends Params {
   /** @group expertGetParam */
   final def getCollectSubModels: Boolean = $(collectSubModels)
 }
-/
 // scalastyle:on