Skip to content

Commit

Permalink
[SPARK-19282][ML][SPARKR] RandomForest Wrapper and GBT Wrapper return…
Browse files Browse the repository at this point in the history
… param "maxDepth" to R models

## What changes were proposed in this pull request?

RandomForest R Wrapper and GBT R Wrapper return param `maxDepth` to R models.

Below 4 R wrappers are changed:
* `RandomForestClassificationWrapper`
* `RandomForestRegressionWrapper`
* `GBTClassificationWrapper`
* `GBTRegressionWrapper`

## How was this patch tested?

Test manually on my local machine.

Author: Xin Ren <iamshrek@126.com>

Closes #17207 from keypointt/SPARK-19282.
  • Loading branch information
keypointt authored and Felix Cheung committed Mar 12, 2017
1 parent 2f5187b commit 9f8ce48
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 4 deletions.
11 changes: 7 additions & 4 deletions R/pkg/R/mllib_tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ summary.treeEnsemble <- function(model) {
numFeatures <- callJMethod(jobj, "numFeatures")
features <- callJMethod(jobj, "features")
featureImportances <- callJMethod(callJMethod(jobj, "featureImportances"), "toString")
maxDepth <- callJMethod(jobj, "maxDepth")
numTrees <- callJMethod(jobj, "numTrees")
treeWeights <- callJMethod(jobj, "treeWeights")
list(formula = formula,
numFeatures = numFeatures,
features = features,
featureImportances = featureImportances,
maxDepth = maxDepth,
numTrees = numTrees,
treeWeights = treeWeights,
jobj = jobj)
Expand All @@ -70,6 +72,7 @@ print.summary.treeEnsemble <- function(x) {
cat("\nNumber of features: ", x$numFeatures)
cat("\nFeatures: ", unlist(x$features))
cat("\nFeature importances: ", x$featureImportances)
cat("\nMax Depth: ", x$maxDepth)
cat("\nNumber of trees: ", x$numTrees)
cat("\nTree weights: ", unlist(x$treeWeights))

Expand Down Expand Up @@ -197,8 +200,8 @@ setMethod("spark.gbt", signature(data = "SparkDataFrame", formula = "formula"),
#' @return \code{summary} returns summary information of the fitted model, which is a list.
#' The list of components includes \code{formula} (formula),
#' \code{numFeatures} (number of features), \code{features} (list of features),
#' \code{featureImportances} (feature importances), \code{numTrees} (number of trees),
#' and \code{treeWeights} (tree weights).
#' \code{featureImportances} (feature importances), \code{maxDepth} (max depth of trees),
#' \code{numTrees} (number of trees), and \code{treeWeights} (tree weights).
#' @rdname spark.gbt
#' @aliases summary,GBTRegressionModel-method
#' @export
Expand Down Expand Up @@ -403,8 +406,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
#' @return \code{summary} returns summary information of the fitted model, which is a list.
#' The list of components includes \code{formula} (formula),
#' \code{numFeatures} (number of features), \code{features} (list of features),
#' \code{featureImportances} (feature importances), \code{numTrees} (number of trees),
#' and \code{treeWeights} (tree weights).
#' \code{featureImportances} (feature importances), \code{maxDepth} (max depth of trees),
#' \code{numTrees} (number of trees), and \code{treeWeights} (tree weights).
#' @rdname spark.randomForest
#' @aliases summary,RandomForestRegressionModel-method
#' @export
Expand Down
10 changes: 10 additions & 0 deletions R/pkg/inst/tests/testthat/test_mllib_tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ test_that("spark.gbt", {
tolerance = 1e-4)
stats <- summary(model)
expect_equal(stats$numTrees, 20)
expect_equal(stats$maxDepth, 5)
expect_equal(stats$formula, "Employed ~ .")
expect_equal(stats$numFeatures, 6)
expect_equal(length(stats$treeWeights), 20)
Expand All @@ -53,6 +54,7 @@ test_that("spark.gbt", {
expect_equal(stats$numFeatures, stats2$numFeatures)
expect_equal(stats$features, stats2$features)
expect_equal(stats$featureImportances, stats2$featureImportances)
expect_equal(stats$maxDepth, stats2$maxDepth)
expect_equal(stats$numTrees, stats2$numTrees)
expect_equal(stats$treeWeights, stats2$treeWeights)

Expand All @@ -66,6 +68,7 @@ test_that("spark.gbt", {
stats <- summary(model)
expect_equal(stats$numFeatures, 2)
expect_equal(stats$numTrees, 20)
expect_equal(stats$maxDepth, 5)
expect_error(capture.output(stats), NA)
expect_true(length(capture.output(stats)) > 6)
predictions <- collect(predict(model, data))$prediction
Expand Down Expand Up @@ -93,6 +96,7 @@ test_that("spark.gbt", {
expect_equal(iris2$NumericSpecies, as.double(collect(predict(m, df))$prediction))
expect_equal(s$numFeatures, 5)
expect_equal(s$numTrees, 20)
expect_equal(stats$maxDepth, 5)

# spark.gbt classification can work on libsvm data
data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"),
Expand All @@ -116,6 +120,7 @@ test_that("spark.randomForest", {

stats <- summary(model)
expect_equal(stats$numTrees, 1)
expect_equal(stats$maxDepth, 5)
expect_error(capture.output(stats), NA)
expect_true(length(capture.output(stats)) > 6)

Expand All @@ -129,6 +134,7 @@ test_that("spark.randomForest", {
tolerance = 1e-4)
stats <- summary(model)
expect_equal(stats$numTrees, 20)
expect_equal(stats$maxDepth, 5)

modelPath <- tempfile(pattern = "spark-randomForestRegression", fileext = ".tmp")
write.ml(model, modelPath)
Expand All @@ -141,6 +147,7 @@ test_that("spark.randomForest", {
expect_equal(stats$features, stats2$features)
expect_equal(stats$featureImportances, stats2$featureImportances)
expect_equal(stats$numTrees, stats2$numTrees)
expect_equal(stats$maxDepth, stats2$maxDepth)
expect_equal(stats$treeWeights, stats2$treeWeights)

unlink(modelPath)
Expand All @@ -153,6 +160,7 @@ test_that("spark.randomForest", {
stats <- summary(model)
expect_equal(stats$numFeatures, 2)
expect_equal(stats$numTrees, 20)
expect_equal(stats$maxDepth, 5)
expect_error(capture.output(stats), NA)
expect_true(length(capture.output(stats)) > 6)
# Test string prediction values
Expand Down Expand Up @@ -187,6 +195,8 @@ test_that("spark.randomForest", {
stats <- summary(model)
expect_equal(stats$numFeatures, 2)
expect_equal(stats$numTrees, 20)
expect_equal(stats$maxDepth, 5)

# Test numeric prediction values
predictions <- collect(predict(model, data))$prediction
expect_equal(length(grep("1.0", predictions)), 50)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ private[r] class GBTClassifierWrapper private (
lazy val featureImportances: Vector = gbtcModel.featureImportances
lazy val numTrees: Int = gbtcModel.getNumTrees
lazy val treeWeights: Array[Double] = gbtcModel.treeWeights
lazy val maxDepth: Int = gbtcModel.getMaxDepth

def summary: String = gbtcModel.toDebugString

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ private[r] class GBTRegressorWrapper private (
lazy val featureImportances: Vector = gbtrModel.featureImportances
lazy val numTrees: Int = gbtrModel.getNumTrees
lazy val treeWeights: Array[Double] = gbtrModel.treeWeights
lazy val maxDepth: Int = gbtrModel.getMaxDepth

def summary: String = gbtrModel.toDebugString

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ private[r] class RandomForestClassifierWrapper private (
lazy val featureImportances: Vector = rfcModel.featureImportances
lazy val numTrees: Int = rfcModel.getNumTrees
lazy val treeWeights: Array[Double] = rfcModel.treeWeights
lazy val maxDepth: Int = rfcModel.getMaxDepth

def summary: String = rfcModel.toDebugString

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ private[r] class RandomForestRegressorWrapper private (
lazy val featureImportances: Vector = rfrModel.featureImportances
lazy val numTrees: Int = rfrModel.getNumTrees
lazy val treeWeights: Array[Double] = rfrModel.treeWeights
lazy val maxDepth: Int = rfrModel.getMaxDepth

def summary: String = rfrModel.toDebugString

Expand Down

0 comments on commit 9f8ce48

Please sign in to comment.