From fc370c0c3fba12af42551d4d71043cb54e3fde71 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 29 Feb 2016 13:32:58 +0800
Subject: [PATCH] Make MLlib LR's default parameters consistent in Scala and
 Python

---
 .../spark/mllib/classification/LogisticRegression.scala   | 4 ++++
 python/pyspark/mllib/classification.py                    | 8 +++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index c3882606d7dbd..f807b5683c390 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -408,6 +408,10 @@ class LogisticRegressionWithLBFGS
    * defaults to the mllib implementation. If more than two classes
    * or feature scaling is disabled, always uses mllib implementation.
    * Uses user provided weights.
+   *
+   * In the ml LogisticRegression implementation, the number of corrections
+   * used in the LBFGS update can not be configured. So `optimizer.setNumCorrections()`
+   * will have no effect if we fall into that route.
    */
   override def run(input: RDD[LabeledPoint], initialWeights: Vector): LogisticRegressionModel = {
     run(input, initialWeights, userSuppliedWeights = true)
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index b4d54ef61b0e6..53a0df27cace2 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -326,7 +326,7 @@ class LogisticRegressionWithLBFGS(object):
     """
     @classmethod
     @since('1.2.0')
-    def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2",
+    def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2",
               intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2):
         """
         Train a logistic regression model on the given data.
@@ -341,7 +341,7 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType
           (default: None)
         :param regParam:
           The regularizer parameter.
-          (default: 0.01)
+          (default: 0.0)
         :param regType:
           The type of regularizer used for training our model.
           Allowed values:
@@ -356,7 +356,9 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType
           (default: False)
         :param corrections:
           The number of corrections used in the LBFGS update.
-          (default: 10)
+          If a known updater is used for binary classification,
+          it calls the ml implementation and this parameter will
+          have no effect. (default: 10)
         :param tolerance:
           The convergence tolerance of iterations for L-BFGS.
           (default: 1e-6)