From 836d760ba89713223d719adcef4c0d47dd306f41 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 18 Apr 2018 19:46:17 +0800 Subject: [PATCH 1/5] init pr --- .../ml/classification/GBTClassifier.scala | 15 +++++++++++++++ .../spark/ml/regression/GBTRegressor.scala | 17 ++++++++++++++++- .../ml/classification/GBTClassifierSuite.scala | 18 ++++++++++++++++-- .../ml/regression/GBTRegressorSuite.scala | 17 +++++++++++++++-- 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index c0255103bc31..99f2a4ba1a9a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -334,6 +334,21 @@ class GBTClassificationModel private[ml]( // hard coded loss, which is not meant to be changed in the model private val loss = getOldLossType + /** + * Method to compute error or loss for every iteration of gradient boosting. + * + * @param dataset Dataset for validation. + */ + @Since("2.4.0") + def evaluateEachIteration(dataset: Dataset[_]): Array[Double] = { + val data = dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map { + case Row(label: Double, features: Vector) => LabeledPoint(label, features) + } + GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, loss, + OldAlgo.Classification + ) + } + @Since("2.0.0") override def write: MLWriter = new GBTClassificationModel.GBTClassificationModelWriter(this) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 834aaa0e362d..cb35e8933769 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -34,7 +34,7 @@ import org.apache.spark.ml.util.DefaultParamsReader.Metadata import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel} import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.sql.functions._ /** @@ -269,6 +269,21 @@ class GBTRegressionModel private[ml]( new OldGBTModel(OldAlgo.Regression, _trees.map(_.toOld), _treeWeights) } + /** + * Method to compute error or loss for every iteration of gradient boosting. + * + * @param dataset Dataset for validation. + */ + @Since("2.4.0") + def evaluateEachIteration(dataset: Dataset[_]): Array[Double] = { + val data = dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map { + case Row(label: Double, features: Vector) => LabeledPoint(label, features) + } + GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, getOldLossType, + OldAlgo.Regression + ) + } + @Since("2.0.0") override def write: MLWriter = new GBTRegressionModel.GBTRegressionModelWriter(this) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index f0ee5496f9d1..7d48c9d37b14 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -18,14 +18,14 @@ package org.apache.spark.ml.classification import com.github.fommil.netlib.BLAS - import org.apache.spark.{SparkException, SparkFunSuite} + import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree.RegressionLeafNode -import org.apache.spark.ml.tree.impl.TreeTests +import org.apache.spark.ml.tree.impl.{GradientBoostedTrees, TreeTests} import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} @@ -365,6 +365,20 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { assert(mostImportantFeature !== mostIF) } + test("model evaluateEachIteration") { + for (lossType <- Seq("logistic")) { + val gbt = new GBTClassifier() + .setMaxDepth(2) + .setMaxIter(2) + .setLossType(lossType) + val model = gbt.fit(trainData.toDF) + val eval1 = model.evaluateEachIteration(validationData.toDF) + val eval2 = GradientBoostedTrees.evaluateEachIteration(validationData, + model.trees, model.treeWeights, model.getOldLossType, OldAlgo.Classification) + assert(eval1 === eval2) + } + } + ///////////////////////////////////////////////////////////////////////////// // Tests of model save/load ///////////////////////////////////////////////////////////////////////////// diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index fad11d078250..e6aeee74ac9b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -18,9 +18,10 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite + import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} -import org.apache.spark.ml.tree.impl.TreeTests +import org.apache.spark.ml.tree.impl.{GradientBoostedTrees, TreeTests} import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees => OldGBT} @@ -201,7 +202,19 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { assert(mostImportantFeature !== mostIF) } - + test("model evaluateEachIteration") { + for (lossType <- Seq("squared", "absolute")) { + val gbt = new GBTRegressor() + .setMaxDepth(2) + .setMaxIter(2) + .setLossType(lossType) + val model = gbt.fit(trainData.toDF) + val eval1 = model.evaluateEachIteration(validationData.toDF) + val eval2 = GradientBoostedTrees.evaluateEachIteration(validationData, + model.trees, model.treeWeights, model.getOldLossType, OldAlgo.Regression) + assert(eval1 === eval2) + } + } ///////////////////////////////////////////////////////////////////////////// // Tests of model save/load From 16fd4d690acf2914665de803d528cd1f96ed5ec8 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 18 Apr 2018 19:52:50 +0800 Subject: [PATCH 2/5] fix style --- .../org/apache/spark/ml/classification/GBTClassifierSuite.scala | 2 +- .../org/apache/spark/ml/regression/GBTRegressorSuite.scala | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index 7d48c9d37b14..94f4408fa3fe 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -18,8 +18,8 @@ package org.apache.spark.ml.classification import com.github.fommil.netlib.BLAS -import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.param.ParamsSuite diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index e6aeee74ac9b..b5819c28ca17 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite - import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.tree.impl.{GradientBoostedTrees, TreeTests} From a2af286ea71803ee61a5b05c77097b3c2556fd52 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Fri, 4 May 2018 17:51:49 +0800 Subject: [PATCH 3/5] add loss param & update unit test --- .../spark/ml/regression/GBTRegressor.scala | 7 +++--- .../org/apache/spark/ml/tree/treeParams.scala | 6 ++++- .../classification/GBTClassifierSuite.scala | 17 +++++-------- .../ml/regression/GBTRegressorSuite.scala | 24 +++++++++---------- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index cb35e8933769..90d1b9751e7f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -275,13 +275,12 @@ class GBTRegressionModel private[ml]( * @param dataset Dataset for validation. */ @Since("2.4.0") - def evaluateEachIteration(dataset: Dataset[_]): Array[Double] = { + def evaluateEachIteration(dataset: Dataset[_], loss: String): Array[Double] = { val data = dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map { case Row(label: Double, features: Vector) => LabeledPoint(label, features) } - GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, getOldLossType, - OldAlgo.Regression - ) + GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, + convertToOldLossType(loss), OldAlgo.Regression) } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 81b6222acc7c..ec8868bb42cb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -579,7 +579,11 @@ private[ml] trait GBTRegressorParams extends GBTParams with TreeRegressorParams /** (private[ml]) Convert new loss to old loss. */ override private[ml] def getOldLossType: OldLoss = { - getLossType match { + convertToOldLossType(getLossType) + } + + private[ml] def convertToOldLossType(loss: String): OldLoss = { + loss match { case "squared" => OldSquaredError case "absolute" => OldAbsoluteError case _ => diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index 94f4408fa3fe..c52002cc1815 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -366,17 +366,12 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { } test("model evaluateEachIteration") { - for (lossType <- Seq("logistic")) { - val gbt = new GBTClassifier() - .setMaxDepth(2) - .setMaxIter(2) - .setLossType(lossType) - val model = gbt.fit(trainData.toDF) - val eval1 = model.evaluateEachIteration(validationData.toDF) - val eval2 = GradientBoostedTrees.evaluateEachIteration(validationData, - model.trees, model.treeWeights, model.getOldLossType, OldAlgo.Classification) - assert(eval1 === eval2) - } + val gbt = new GBTClassifier() + .setMaxDepth(2) + .setMaxIter(2) + val model = gbt.fit(trainData.toDF) + val eval = model.evaluateEachIteration(validationData.toDF) + assert(Vectors.dense(eval) ~== Vectors.dense(1.7641, 1.8209) relTol 1E-3) } ///////////////////////////////////////////////////////////////////////////// diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index b5819c28ca17..04323450a82b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -20,8 +20,9 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} -import org.apache.spark.ml.tree.impl.{GradientBoostedTrees, TreeTests} +import org.apache.spark.ml.tree.impl.TreeTests import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} +import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees => OldGBT} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} @@ -202,17 +203,16 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { } test("model evaluateEachIteration") { - for (lossType <- Seq("squared", "absolute")) { - val gbt = new GBTRegressor() - .setMaxDepth(2) - .setMaxIter(2) - .setLossType(lossType) - val model = gbt.fit(trainData.toDF) - val eval1 = model.evaluateEachIteration(validationData.toDF) - val eval2 = GradientBoostedTrees.evaluateEachIteration(validationData, - model.trees, model.treeWeights, model.getOldLossType, OldAlgo.Regression) - assert(eval1 === eval2) - } + val gbt = new GBTRegressor() + .setMaxDepth(2) + .setMaxIter(2) + .setLossType("squared") + val model = gbt.fit(trainData.toDF) + val eval1 = model.evaluateEachIteration(validationData.toDF, "squared") + assert(Vectors.dense(eval1) ~== Vectors.dense(0.3736, 0.3745) relTol 1E-3) + + val eval2 = model.evaluateEachIteration(validationData.toDF, "absolute") + assert(Vectors.dense(eval2) ~== Vectors.dense(0.3908, 0.3931) relTol 1E-3) } ///////////////////////////////////////////////////////////////////////////// From c32b5a81ffffde06d318aee1b18c82d82568d2e8 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Tue, 8 May 2018 10:28:38 +0800 Subject: [PATCH 4/5] address comments and update unittest --- .../spark/ml/regression/GBTRegressor.scala | 1 + .../classification/GBTClassifierSuite.scala | 28 ++++++++++++-- .../ml/regression/GBTRegressorSuite.scala | 38 +++++++++++++------ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 90d1b9751e7f..30f7c43dc71e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -273,6 +273,7 @@ class GBTRegressionModel private[ml]( * Method to compute error or loss for every iteration of gradient boosting. * * @param dataset Dataset for validation. + * @param loss The loss function used to compute error. Supported options: squared, absolute */ @Since("2.4.0") def evaluateEachIteration(dataset: Dataset[_], loss: String): Array[Double] = { diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index c52002cc1815..ff6915fcf351 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -367,11 +367,31 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { test("model evaluateEachIteration") { val gbt = new GBTClassifier() + .setSeed(1L) .setMaxDepth(2) - .setMaxIter(2) - val model = gbt.fit(trainData.toDF) - val eval = model.evaluateEachIteration(validationData.toDF) - assert(Vectors.dense(eval) ~== Vectors.dense(1.7641, 1.8209) relTol 1E-3) + .setMaxIter(3) + .setLossType("logistic") + val model3 = gbt.fit(trainData.toDF) + val model1 = new GBTClassificationModel("gbt-cls-model-test1", + model3.trees.take(1), model3.treeWeights.take(1), model3.numFeatures, model3.numClasses) + val model2 = new GBTClassificationModel("gbt-cls-model-test2", + model3.trees.take(2), model3.treeWeights.take(2), model3.numFeatures, model3.numClasses) + + for (evalLossType <- GBTClassifier.supportedLossTypes) { + val evalArr = model3.evaluateEachIteration(validationData.toDF) + val remappedValidationData = validationData.map( + x => new LabeledPoint((x.label * 2) - 1, x.features)) + val lossErr1 = GradientBoostedTrees.computeError(remappedValidationData, + model1.trees, model1.treeWeights, model1.getOldLossType) + val lossErr2 = GradientBoostedTrees.computeError(remappedValidationData, + model2.trees, model2.treeWeights, model2.getOldLossType) + val lossErr3 = GradientBoostedTrees.computeError(remappedValidationData, + model3.trees, model3.treeWeights, model3.getOldLossType) + + assert(evalArr(0) ~== lossErr1 relTol 1E-3) + assert(evalArr(1) ~== lossErr2 relTol 1E-3) + assert(evalArr(2) ~== lossErr3 relTol 1E-3) + } } ///////////////////////////////////////////////////////////////////////////// diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index 04323450a82b..773f6d2c542f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{Vector, Vectors} -import org.apache.spark.ml.tree.impl.TreeTests +import org.apache.spark.ml.tree.impl.{GradientBoostedTrees, TreeTests} import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} @@ -203,16 +203,32 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { } test("model evaluateEachIteration") { - val gbt = new GBTRegressor() - .setMaxDepth(2) - .setMaxIter(2) - .setLossType("squared") - val model = gbt.fit(trainData.toDF) - val eval1 = model.evaluateEachIteration(validationData.toDF, "squared") - assert(Vectors.dense(eval1) ~== Vectors.dense(0.3736, 0.3745) relTol 1E-3) - - val eval2 = model.evaluateEachIteration(validationData.toDF, "absolute") - assert(Vectors.dense(eval2) ~== Vectors.dense(0.3908, 0.3931) relTol 1E-3) + for (lossType <- GBTRegressor.supportedLossTypes) { + val gbt = new GBTRegressor() + .setSeed(1L) + .setMaxDepth(2) + .setMaxIter(3) + .setLossType(lossType) + val model3 = gbt.fit(trainData.toDF) + val model1 = new GBTRegressionModel("gbt-reg-model-test1", + model3.trees.take(1), model3.treeWeights.take(1), model3.numFeatures) + val model2 = new GBTRegressionModel("gbt-reg-model-test2", + model3.trees.take(2), model3.treeWeights.take(2), model3.numFeatures) + + for (evalLossType <- GBTRegressor.supportedLossTypes) { + val evalArr = model3.evaluateEachIteration(validationData.toDF, evalLossType) + val lossErr1 = GradientBoostedTrees.computeError(validationData, + model1.trees, model1.treeWeights, model1.convertToOldLossType(evalLossType)) + val lossErr2 = GradientBoostedTrees.computeError(validationData, + model2.trees, model2.treeWeights, model2.convertToOldLossType(evalLossType)) + val lossErr3 = GradientBoostedTrees.computeError(validationData, + model3.trees, model3.treeWeights, model3.convertToOldLossType(evalLossType)) + + assert(evalArr(0) ~== lossErr1 relTol 1E-3) + assert(evalArr(1) ~== lossErr2 relTol 1E-3) + assert(evalArr(2) ~== lossErr3 relTol 1E-3) + } + } } ///////////////////////////////////////////////////////////////////////////// From 0e7311fc570dd2debbbdff5fc5f7795316a2f415 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 9 May 2018 15:52:54 +0800 Subject: [PATCH 5/5] update tests --- .../classification/GBTClassifierSuite.scala | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index ff6915fcf351..e20de196d65c 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -377,21 +377,19 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { val model2 = new GBTClassificationModel("gbt-cls-model-test2", model3.trees.take(2), model3.treeWeights.take(2), model3.numFeatures, model3.numClasses) - for (evalLossType <- GBTClassifier.supportedLossTypes) { - val evalArr = model3.evaluateEachIteration(validationData.toDF) - val remappedValidationData = validationData.map( - x => new LabeledPoint((x.label * 2) - 1, x.features)) - val lossErr1 = GradientBoostedTrees.computeError(remappedValidationData, - model1.trees, model1.treeWeights, model1.getOldLossType) - val lossErr2 = GradientBoostedTrees.computeError(remappedValidationData, - model2.trees, model2.treeWeights, model2.getOldLossType) - val lossErr3 = GradientBoostedTrees.computeError(remappedValidationData, - model3.trees, model3.treeWeights, model3.getOldLossType) - - assert(evalArr(0) ~== lossErr1 relTol 1E-3) - assert(evalArr(1) ~== lossErr2 relTol 1E-3) - assert(evalArr(2) ~== lossErr3 relTol 1E-3) - } + val evalArr = model3.evaluateEachIteration(validationData.toDF) + val remappedValidationData = validationData.map( + x => new LabeledPoint((x.label * 2) - 1, x.features)) + val lossErr1 = GradientBoostedTrees.computeError(remappedValidationData, + model1.trees, model1.treeWeights, model1.getOldLossType) + val lossErr2 = GradientBoostedTrees.computeError(remappedValidationData, + model2.trees, model2.treeWeights, model2.getOldLossType) + val lossErr3 = GradientBoostedTrees.computeError(remappedValidationData, + model3.trees, model3.treeWeights, model3.getOldLossType) + + assert(evalArr(0) ~== lossErr1 relTol 1E-3) + assert(evalArr(1) ~== lossErr2 relTol 1E-3) + assert(evalArr(2) ~== lossErr3 relTol 1E-3) } /////////////////////////////////////////////////////////////////////////////