Skip to content

Commit

Permalink
Use np.allclose and treeEnsembleModel -> TreeEnsembleMethods
Browse files Browse the repository at this point in the history
  • Loading branch information
MechCoder committed Jun 30, 2015
1 parent 819098c commit 47d7023
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 16 deletions.
16 changes: 9 additions & 7 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pyspark.ml.wrapper import JavaEstimator, JavaModel
from pyspark.ml.param.shared import *
from pyspark.ml.regression import (
RandomForestParams, DecisionTreeModel, treeEnsembleModels)
RandomForestParams, DecisionTreeModel, TreeEnsembleModels)
from pyspark.mllib.common import inherit_doc


Expand Down Expand Up @@ -290,6 +290,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
It supports both binary and multiclass labels, as well as both continuous and categorical
features.
>>> from numpy import allclose
>>> from pyspark.mllib.linalg import Vectors
>>> from pyspark.ml.feature import StringIndexer
>>> df = sqlContext.createDataFrame([
Expand All @@ -300,8 +301,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
>>> td = si_model.transform(df)
>>> rf = RandomForestClassifier(numTrees=2, maxDepth=2, labelCol="indexed", seed=42)
>>> model = rf.fit(td)
>>> model.treeWeights
[1.0, 1.0]
>>> allclose(model.treeWeights, [1.0, 1.0])
True
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down Expand Up @@ -431,7 +432,7 @@ def getFeatureSubsetStrategy(self):
return self.getOrDefault(self.featureSubsetStrategy)


class RandomForestClassificationModel(treeEnsembleModels):
class RandomForestClassificationModel(TreeEnsembleModels):
"""
Model fitted by RandomForestClassifier.
"""
Expand All @@ -446,6 +447,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
It supports binary labels, as well as both continuous and categorical features.
Note: Multiclass labels are not currently supported.
>>> from numpy import allclose
>>> from pyspark.mllib.linalg import Vectors
>>> from pyspark.ml.feature import StringIndexer
>>> df = sqlContext.createDataFrame([
Expand All @@ -456,8 +458,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
>>> td = si_model.transform(df)
>>> gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed")
>>> model = gbt.fit(td)
>>> model.treeWeights
[1.0, 0.1, 0.1, 0.1, 0.1]
>>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
True
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down Expand Up @@ -568,7 +570,7 @@ def getStepSize(self):
return self.getOrDefault(self.stepSize)


class GBTClassificationModel(treeEnsembleModels):
class GBTClassificationModel(TreeEnsembleModels):
"""
Model fitted by GBTClassifier.
"""
Expand Down
20 changes: 11 additions & 9 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
>>> dt = DecisionTreeRegressor(maxDepth=2)
>>> model = dt.fit(df)
>>> model.depth
2
>>> model.numNodes
1
>>> model.numNodes
3
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down Expand Up @@ -261,7 +261,7 @@ def __repr__(self):


@inherit_doc
class treeEnsembleModels(JavaModel):
class TreeEnsembleModels(JavaModel):

@property
def treeWeights(self):
Expand All @@ -286,14 +286,15 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
learning algorithm for regression.
It supports both continuous and categorical features.
>>> from numpy import allclose
>>> from pyspark.mllib.linalg import Vectors
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
>>> rf = RandomForestRegressor(numTrees=2, maxDepth=2, seed=42)
>>> model = rf.fit(df)
>>> model.treeWeights
[1.0, 1.0]
>>> allclose(model.treeWeights, [1.0, 1.0])
True
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down Expand Up @@ -424,7 +425,7 @@ def getFeatureSubsetStrategy(self):
return self.getOrDefault(self.featureSubsetStrategy)


class RandomForestRegressionModel(treeEnsembleModels):
class RandomForestRegressionModel(TreeEnsembleModels):
"""
Model fitted by RandomForestRegressor.
"""
Expand All @@ -438,14 +439,15 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
learning algorithm for regression.
It supports both continuous and categorical features.
>>> from numpy import allclose
>>> from pyspark.mllib.linalg import Vectors
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
>>> gbt = GBTRegressor(maxIter=5, maxDepth=2)
>>> model = gbt.fit(df)
>>> model.treeWeights
[1.0, 0.1, 0.1, 0.1, 0.1]
>>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
True
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down Expand Up @@ -555,7 +557,7 @@ def getStepSize(self):
return self.getOrDefault(self.stepSize)


class GBTRegressionModel(treeEnsembleModels):
class GBTRegressionModel(TreeEnsembleModels):
"""
Model fitted by GBTRegressor.
"""
Expand Down

0 comments on commit 47d7023

Please sign in to comment.