From fc370c0c3fba12af42551d4d71043cb54e3fde71 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Mon, 29 Feb 2016 13:32:58 +0800 Subject: [PATCH] Make MLlib LR's default parameters consistent in Scala and Python --- .../spark/mllib/classification/LogisticRegression.scala | 4 ++++ python/pyspark/mllib/classification.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index c3882606d7dbd..f807b5683c390 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -408,6 +408,10 @@ class LogisticRegressionWithLBFGS * defaults to the mllib implementation. If more than two classes * or feature scaling is disabled, always uses mllib implementation. * Uses user provided weights. + * + * In the ml LogisticRegression implementation, the number of corrections + * used in the LBFGS update can not be configured. So `optimizer.setNumCorrections()` + * will have no effect if we fall into that route. */ override def run(input: RDD[LabeledPoint], initialWeights: Vector): LogisticRegressionModel = { run(input, initialWeights, userSuppliedWeights = true) diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index b4d54ef61b0e6..53a0df27cace2 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -326,7 +326,7 @@ class LogisticRegressionWithLBFGS(object): """ @classmethod @since('1.2.0') - def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2", + def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2", intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2): """ Train a logistic regression model on the given data. @@ -341,7 +341,7 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType (default: None) :param regParam: The regularizer parameter. - (default: 0.01) + (default: 0.0) :param regType: The type of regularizer used for training our model. Allowed values: @@ -356,7 +356,9 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType (default: False) :param corrections: The number of corrections used in the LBFGS update. - (default: 10) + If a known updater is used for binary classification, + it calls the ml implementation and this parameter will + have no effect. (default: 10) :param tolerance: The convergence tolerance of iterations for L-BFGS. (default: 1e-6)