From 83c75043fee2a20f1eb6298bd2dab1259409c3ef Mon Sep 17 00:00:00 2001 From: Peng Meng Date: Thu, 3 Aug 2017 16:26:51 +0800 Subject: [PATCH 1/3] fix RF doc --- .../org/apache/spark/ml/tree/impl/DTStatsAggregator.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala index 61091bb803e49..d44ea87bf6ab8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala @@ -78,9 +78,6 @@ private[spark] class DTStatsAggregator( /** * Array of parent node sufficient stats. - * - * Note: this is necessary because stats for the parent node are not available - * on the first iteration of tree learning. */ private val parentStats: Array[Double] = new Array[Double](statsSize) From 814e6b2fc510633eb347bb698e6a7a3d28407f14 Mon Sep 17 00:00:00 2001 From: Peng Meng Date: Mon, 7 Aug 2017 16:33:39 +0800 Subject: [PATCH 2/3] add comment for parentStats --- .../scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala index d44ea87bf6ab8..d73332c085652 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala @@ -78,6 +78,8 @@ private[spark] class DTStatsAggregator( /** * Array of parent node sufficient stats. + * Parent stats need to be explicitly tracked in the [[DTStatsAggregator]] for unordered + * categorical features. */ private val parentStats: Array[Double] = new Array[Double](statsSize) From 04e5abdf07e7f7bc780f62a9bec56eb326d80539 Mon Sep 17 00:00:00 2001 From: Peng Meng Date: Mon, 7 Aug 2017 16:42:40 +0800 Subject: [PATCH 3/3] fix RF doc --- .../org/apache/spark/ml/tree/impl/DTStatsAggregator.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala index d73332c085652..5aeea1443d499 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala @@ -78,8 +78,9 @@ private[spark] class DTStatsAggregator( /** * Array of parent node sufficient stats. - * Parent stats need to be explicitly tracked in the [[DTStatsAggregator]] for unordered - * categorical features. + * Note: parent stats need to be explicitly tracked in the [[DTStatsAggregator]] for unordered + * categorical features, because the parent [[Node]] object does not have [[ImpurityStats]] + * on the first iteration. */ private val parentStats: Array[Double] = new Array[Double](statsSize)