From fbf6b5cab51544c9230567e9479528a9bd8960c5 Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Wed, 13 Jan 2016 17:52:56 +0000
Subject: [PATCH 1/8] Initial fix and println unit test

---
 .../ml/classification/LogisticRegression.scala     |  8 +++++---
 .../classification/LogisticRegressionSuite.scala   | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 486043e8d9741..3040081df9bc7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -339,9 +339,11 @@ class LogisticRegression @Since("1.2.0") (
          b = \log{P(1) / P(0)} = \log{count_1 / count_0}
          }}}
        */
-      initialCoefficientsWithIntercept.toArray(numFeatures)
-        = math.log(histogram(1) / histogram(0))
-    }
+       if (histogram.length >= 2) { // check to make sure indexing into histogram(1) is safe
+         initialCoefficientsWithIntercept.toArray(numFeatures) =
+           math.log(histogram(1) / histogram(0))
+       }
+     }
 
     val states = optimizer.iterations(new CachedDiffFunction(costFun),
       initialCoefficientsWithIntercept.toBreeze.toDenseVector)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index ff0d0ff771042..d41540822449d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -883,6 +883,20 @@ class LogisticRegressionSuite
     assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
   }
 
+  test("logistic regression with all labels the same") {
+    val lr = new LogisticRegression()
+      .setFitIntercept(true)
+      .setMaxIter(3)
+      .setLabelCol("sameLabel")
+    val sameLabelDataset = dataset.withColumn(lit(0.0), "sameLabel")
+    val model = lr.fit(sameLabelDataset)
+    val predictions = model.transform(sameLabelDataset)
+
+    predictions.show()
+    println(model.weights)
+    println(model.intercept)
+  }
+
   test("read/write") {
     def checkModelData(model: LogisticRegressionModel, model2: LogisticRegressionModel): Unit = {
       assert(model.intercept === model2.intercept)

From e4c13d4a89abc8160f1c2fa906cb3e3d1affd473 Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Wed, 13 Jan 2016 19:23:49 +0000
Subject: [PATCH 2/8] Cleans up test

---
 .../classification/LogisticRegression.scala   |  4 +-
 .../LogisticRegressionSuite.scala             | 51 ++++++++++---------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 3040081df9bc7..cf7da12286b7f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -340,8 +340,8 @@ class LogisticRegression @Since("1.2.0") (
          }}}
        */
        if (histogram.length >= 2) { // check to make sure indexing into histogram(1) is safe
-         initialCoefficientsWithIntercept.toArray(numFeatures) =
-           math.log(histogram(1) / histogram(0))
+         initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
+           histogram(1) / histogram(0))
        }
      }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index d41540822449d..a0d425f22dbed 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions.lit
 
 class LogisticRegressionSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
@@ -302,7 +303,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -338,7 +339,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -377,7 +378,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -401,7 +402,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-2)
     assert(model1.coefficients ~= coefficientsR1 absTol 2E-2)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -436,7 +437,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -461,7 +462,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 absTol 1E-3)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -496,7 +497,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -520,7 +521,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -555,7 +556,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -580,7 +581,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -615,7 +616,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -639,7 +640,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 6E-3)
     assert(model1.coefficients ~== coefficientsR1 absTol 5E-3)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -674,7 +675,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -699,7 +700,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 absTol 1E-2)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -744,7 +745,7 @@ class LogisticRegressionSuite
             classSummarizer1.merge(classSummarizer2)
         }).histogram
 
-    /*
+    [>
        For binary logistic regression with strong L1 regularization, all the coefficients
        will be zeros. As a result,
        {{{
@@ -764,7 +765,7 @@ class LogisticRegressionSuite
     assert(model2.intercept ~== interceptTheory relTol 1E-5)
     assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
 
-    /*
+    [>
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -887,14 +888,16 @@ class LogisticRegressionSuite
     val lr = new LogisticRegression()
       .setFitIntercept(true)
       .setMaxIter(3)
-      .setLabelCol("sameLabel")
-    val sameLabelDataset = dataset.withColumn(lit(0.0), "sameLabel")
-    val model = lr.fit(sameLabelDataset)
-    val predictions = model.transform(sameLabelDataset)
-
-    predictions.show()
-    println(model.weights)
-    println(model.intercept)
+    val sameLabels = dataset
+      .withColumn("zeroLabel", lit(0.0))
+      .withColumn("oneLabel", lit(1.0))
+
+    val model = lr
+      .setLabelCol("oneLabel")
+      .fit(sameLabels)
+
+    assert(model.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
+    assert(model.intercept === Double.PositiveInfinity)
   }
 
   test("read/write") {

From caf7a1b2cd4336134d1f29e0fb4432a67d44288e Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Wed, 13 Jan 2016 19:26:04 +0000
Subject: [PATCH 3/8] Fix R comments

---
 .../LogisticRegressionSuite.scala             | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index a0d425f22dbed..8ac7bb7db0c1b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -303,7 +303,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -339,7 +339,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -378,7 +378,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -402,7 +402,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-2)
     assert(model1.coefficients ~= coefficientsR1 absTol 2E-2)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -437,7 +437,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -462,7 +462,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 absTol 1E-3)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -497,7 +497,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -521,7 +521,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -556,7 +556,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -581,7 +581,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -616,7 +616,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -640,7 +640,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 6E-3)
     assert(model1.coefficients ~== coefficientsR1 absTol 5E-3)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -675,7 +675,7 @@ class LogisticRegressionSuite
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -700,7 +700,7 @@ class LogisticRegressionSuite
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR1 absTol 1E-2)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -745,7 +745,7 @@ class LogisticRegressionSuite
             classSummarizer1.merge(classSummarizer2)
         }).histogram
 
-    [>
+    /*
        For binary logistic regression with strong L1 regularization, all the coefficients
        will be zeros. As a result,
        {{{
@@ -765,7 +765,7 @@ class LogisticRegressionSuite
     assert(model2.intercept ~== interceptTheory relTol 1E-5)
     assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
 
-    [>
+    /*
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")

From d676f6245622cedf61df3875ca0873d77f72a857 Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Sun, 17 Jan 2016 14:14:55 +0000
Subject: [PATCH 4/8] Returns model immediately for same labels

---
 .../classification/LogisticRegression.scala   | 202 +++++++++---------
 .../LogisticRegressionSuite.scala             |  15 +-
 2 files changed, 115 insertions(+), 102 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index cf7da12286b7f..10692506cd69b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -276,115 +276,123 @@ class LogisticRegression @Since("1.2.0") (
     val numClasses = histogram.length
     val numFeatures = summarizer.mean.size
 
-    if (numInvalid != 0) {
-      val msg = s"Classification labels should be in {0 to ${numClasses - 1} " +
-        s"Found $numInvalid invalid labels."
-      logError(msg)
-      throw new SparkException(msg)
-    }
-
-    if (numClasses > 2) {
-      val msg = s"Currently, LogisticRegression with ElasticNet in ML package only supports " +
-        s"binary classification. Found $numClasses in the input dataset."
-      logError(msg)
-      throw new SparkException(msg)
-    }
+    val (coefficients, intercept, objectiveHistory) = {
+      if (numInvalid != 0) {
+        val msg = s"Classification labels should be in {0 to ${numClasses - 1} " +
+          s"Found $numInvalid invalid labels."
+        logError(msg)
+        throw new SparkException(msg)
+      }
 
-    val featuresMean = summarizer.mean.toArray
-    val featuresStd = summarizer.variance.toArray.map(math.sqrt)
+      if (numClasses > 2) {
+        val msg = s"Currently, LogisticRegression with ElasticNet in ML package only supports " +
+          s"binary classification. Found $numClasses in the input dataset."
+        logError(msg)
+        throw new SparkException(msg)
+      } else if ($(fitIntercept) && numClasses == 2 && histogram(0) == 0.0) {
+        logWarning(s"All labels are one and fitIntercept=true, so the coefficients will be " +
+          s"zeros and the intercept will be positive infinity; as a result, " +
+          s"training is not needed.")
+        (Vectors.sparse(numFeatures, Seq()), Double.PositiveInfinity, Array.empty[Double])
+      } else if ($(fitIntercept) && numClasses == 1) {
+        logWarning(s"All labels are one and fitIntercept=true, so the coefficients will be " +
+          s"zeros and the intercept will be negative infinity; as a result, " +
+          s"training is not needed.")
+        (Vectors.sparse(numFeatures, Seq()), Double.NegativeInfinity, Array.empty[Double])
+      } else {
+        val featuresMean = summarizer.mean.toArray
+        val featuresStd = summarizer.variance.toArray.map(math.sqrt)
 
-    val regParamL1 = $(elasticNetParam) * $(regParam)
-    val regParamL2 = (1.0 - $(elasticNetParam)) * $(regParam)
+        val regParamL1 = $(elasticNetParam) * $(regParam)
+        val regParamL2 = (1.0 - $(elasticNetParam)) * $(regParam)
 
-    val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept), $(standardization),
-      featuresStd, featuresMean, regParamL2)
+        val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept),
+          $(standardization), featuresStd, featuresMean, regParamL2)
 
-    val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
-      new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
-    } else {
-      def regParamL1Fun = (index: Int) => {
-        // Remove the L1 penalization on the intercept
-        if (index == numFeatures) {
-          0.0
+        val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
+          new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
         } else {
-          if ($(standardization)) {
-            regParamL1
-          } else {
-            // If `standardization` is false, we still standardize the data
-            // to improve the rate of convergence; as a result, we have to
-            // perform this reverse standardization by penalizing each component
-            // differently to get effectively the same objective function when
-            // the training dataset is not standardized.
-            if (featuresStd(index) != 0.0) regParamL1 / featuresStd(index) else 0.0
+          def regParamL1Fun = (index: Int) => {
+            // Remove the L1 penalization on the intercept
+            if (index == numFeatures) {
+              0.0
+            } else {
+              if ($(standardization)) {
+                regParamL1
+              } else {
+                // If `standardization` is false, we still standardize the data
+                // to improve the rate of convergence; as a result, we have to
+                // perform this reverse standardization by penalizing each component
+                // differently to get effectively the same objective function when
+                // the training dataset is not standardized.
+                if (featuresStd(index) != 0.0) regParamL1 / featuresStd(index) else 0.0
+              }
+            }
           }
+          new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
         }
-      }
-      new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
-    }
 
-    val initialCoefficientsWithIntercept =
-      Vectors.zeros(if ($(fitIntercept)) numFeatures + 1 else numFeatures)
-
-    if ($(fitIntercept)) {
-      /*
-         For binary logistic regression, when we initialize the coefficients as zeros,
-         it will converge faster if we initialize the intercept such that
-         it follows the distribution of the labels.
-
-         {{{
-         P(0) = 1 / (1 + \exp(b)), and
-         P(1) = \exp(b) / (1 + \exp(b))
-         }}}, hence
-         {{{
-         b = \log{P(1) / P(0)} = \log{count_1 / count_0}
-         }}}
-       */
-       if (histogram.length >= 2) { // check to make sure indexing into histogram(1) is safe
-         initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
-           histogram(1) / histogram(0))
-       }
-     }
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialCoefficientsWithIntercept.toBreeze.toDenseVector)
+        val initialCoefficientsWithIntercept =
+          Vectors.zeros(if ($(fitIntercept)) numFeatures + 1 else numFeatures)
+
+        if ($(fitIntercept)) {
+          /*
+          For binary logistic regression, when we initialize the coefficients as zeros,
+          it will converge faster if we initialize the intercept such that
+          it follows the distribution of the labels.
+
+          {{{
+            P(0) = 1 / (1 + \exp(b)), and
+            P(1) = \exp(b) / (1 + \exp(b))
+          }}}, hence
+          {{{
+            b = \log{P(1) / P(0)} = \log{count_1 / count_0}
+          }}}
+          */
+          initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
+            histogram(1) / histogram(0))
+        }
 
-    val (coefficients, intercept, objectiveHistory) = {
-      /*
-         Note that in Logistic Regression, the objective history (loss + regularization)
-         is log-likelihood which is invariance under feature standardization. As a result,
-         the objective history from optimizer is the same as the one in the original space.
-       */
-      val arrayBuilder = mutable.ArrayBuilder.make[Double]
-      var state: optimizer.State = null
-      while (states.hasNext) {
-        state = states.next()
-        arrayBuilder += state.adjustedValue
-      }
+        val states = optimizer.iterations(new CachedDiffFunction(costFun),
+          initialCoefficientsWithIntercept.toBreeze.toDenseVector)
+
+        /*
+           Note that in Logistic Regression, the objective history (loss + regularization)
+           is log-likelihood which is invariance under feature standardization. As a result,
+           the objective history from optimizer is the same as the one in the original space.
+         */
+        val arrayBuilder = mutable.ArrayBuilder.make[Double]
+        var state: optimizer.State = null
+        while (states.hasNext) {
+          state = states.next()
+          arrayBuilder += state.adjustedValue
+        }
 
-      if (state == null) {
-        val msg = s"${optimizer.getClass.getName} failed."
-        logError(msg)
-        throw new SparkException(msg)
-      }
+        if (state == null) {
+          val msg = s"${optimizer.getClass.getName} failed."
+          logError(msg)
+          throw new SparkException(msg)
+        }
 
-      /*
-         The coefficients are trained in the scaled space; we're converting them back to
-         the original space.
-         Note that the intercept in scaled space and original space is the same;
-         as a result, no scaling is needed.
-       */
-      val rawCoefficients = state.x.toArray.clone()
-      var i = 0
-      while (i < numFeatures) {
-        rawCoefficients(i) *= { if (featuresStd(i) != 0.0) 1.0 / featuresStd(i) else 0.0 }
-        i += 1
-      }
+        /*
+           The coefficients are trained in the scaled space; we're converting them back to
+           the original space.
+           Note that the intercept in scaled space and original space is the same;
+           as a result, no scaling is needed.
+         */
+        val rawCoefficients = state.x.toArray.clone()
+        var i = 0
+        while (i < numFeatures) {
+          rawCoefficients(i) *= { if (featuresStd(i) != 0.0) 1.0 / featuresStd(i) else 0.0 }
+          i += 1
+        }
 
-      if ($(fitIntercept)) {
-        (Vectors.dense(rawCoefficients.dropRight(1)).compressed, rawCoefficients.last,
-          arrayBuilder.result())
-      } else {
-        (Vectors.dense(rawCoefficients).compressed, 0.0, arrayBuilder.result())
+        if ($(fitIntercept)) {
+          (Vectors.dense(rawCoefficients.dropRight(1)).compressed, rawCoefficients.last,
+            arrayBuilder.result())
+        } else {
+          (Vectors.dense(rawCoefficients).compressed, 0.0, arrayBuilder.result())
+        }
       }
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 8ac7bb7db0c1b..5f9a2371cd07f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -884,7 +884,7 @@ class LogisticRegressionSuite
     assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
   }
 
-  test("logistic regression with all labels the same") {
+  test("logistic regression with fitIntercept=true and all labels the same") {
     val lr = new LogisticRegression()
       .setFitIntercept(true)
       .setMaxIter(3)
@@ -892,12 +892,17 @@ class LogisticRegressionSuite
       .withColumn("zeroLabel", lit(0.0))
       .withColumn("oneLabel", lit(1.0))
 
-    val model = lr
-      .setLabelCol("oneLabel")
+    val allZeroModel = lr
+      .setLabelCol("zeroLabel")
       .fit(sameLabels)
+    assert(allZeroModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
+    assert(allZeroModel.intercept === Double.NegativeInfinity)
 
-    assert(model.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
-    assert(model.intercept === Double.PositiveInfinity)
+    val allOneModel = lr
+      .setLabelCol("oneLabel")
+      .fit(sameLabels)
+    assert(allOneModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
+    assert(allOneModel.intercept === Double.PositiveInfinity)
   }
 
   test("read/write") {

From 0f4824d9f358f451968aa2a6ab2b31afd85cda64 Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Mon, 18 Jan 2016 15:15:41 +0000
Subject: [PATCH 5/8] Tests and cleanup from code review

---
 .../classification/LogisticRegression.scala   | 24 +++++------
 .../LogisticRegressionSuite.scala             | 41 ++++++++++++++-----
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 10692506cd69b..0b3ba09bf698f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -295,7 +295,7 @@ class LogisticRegression @Since("1.2.0") (
           s"training is not needed.")
         (Vectors.sparse(numFeatures, Seq()), Double.PositiveInfinity, Array.empty[Double])
       } else if ($(fitIntercept) && numClasses == 1) {
-        logWarning(s"All labels are one and fitIntercept=true, so the coefficients will be " +
+        logWarning(s"All labels are zero and fitIntercept=true, so the coefficients will be " +
           s"zeros and the intercept will be negative infinity; as a result, " +
           s"training is not needed.")
         (Vectors.sparse(numFeatures, Seq()), Double.NegativeInfinity, Array.empty[Double])
@@ -337,17 +337,17 @@ class LogisticRegression @Since("1.2.0") (
 
         if ($(fitIntercept)) {
           /*
-          For binary logistic regression, when we initialize the coefficients as zeros,
-          it will converge faster if we initialize the intercept such that
-          it follows the distribution of the labels.
-
-          {{{
-            P(0) = 1 / (1 + \exp(b)), and
-            P(1) = \exp(b) / (1 + \exp(b))
-          }}}, hence
-          {{{
-            b = \log{P(1) / P(0)} = \log{count_1 / count_0}
-          }}}
+            For binary logistic regression, when we initialize the coefficients as zeros,
+            it will converge faster if we initialize the intercept such that
+            it follows the distribution of the labels.
+
+            {{{
+              P(0) = 1 / (1 + \exp(b)), and
+              P(1) = \exp(b) / (1 + \exp(b))
+            }}}, hence
+            {{{
+              b = \log{P(1) / P(0)} = \log{count_1 / count_0}
+            }}}
           */
           initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
             histogram(1) / histogram(0))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 5f9a2371cd07f..fb79a7c1f5f72 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -884,25 +884,46 @@ class LogisticRegressionSuite
     assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
   }
 
-  test("logistic regression with fitIntercept=true and all labels the same") {
-    val lr = new LogisticRegression()
-      .setFitIntercept(true)
-      .setMaxIter(3)
+  test("logistic regression with all labels the same") {
     val sameLabels = dataset
       .withColumn("zeroLabel", lit(0.0))
       .withColumn("oneLabel", lit(1.0))
 
-    val allZeroModel = lr
+    // fitIntercept=true
+    val lrIntercept = new LogisticRegression()
+      .setFitIntercept(true)
+      .setMaxIter(3)
+
+    val allZeroInterceptModel = lrIntercept
+      .setLabelCol("zeroLabel")
+      .fit(sameLabels)
+    assert(allZeroInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
+    assert(allZeroInterceptModel.intercept === Double.NegativeInfinity)
+    assert(allZeroInterceptModel.summary.totalIterations === 0)
+
+    val allOneInterceptModel = lrIntercept
+      .setLabelCol("oneLabel")
+      .fit(sameLabels)
+    assert(allOneInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
+    assert(allOneInterceptModel.intercept === Double.PositiveInfinity)
+    assert(allOneInterceptModel.summary.totalIterations === 0)
+
+    // fitIntercept=false
+    val lrNoIntercept= new LogisticRegression()
+      .setFitIntercept(false)
+      .setMaxIter(3)
+
+    val allZeroNoInterceptModel = lrNoIntercept
       .setLabelCol("zeroLabel")
       .fit(sameLabels)
-    assert(allZeroModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
-    assert(allZeroModel.intercept === Double.NegativeInfinity)
+    assert(allZeroNoInterceptModel.intercept === 0.0)
+    println(allZeroNoInterceptModel.summary.totalIterations > 0)
 
-    val allOneModel = lr
+    val allOneNoInterceptModel = lrNoIntercept
       .setLabelCol("oneLabel")
       .fit(sameLabels)
-    assert(allOneModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
-    assert(allOneModel.intercept === Double.PositiveInfinity)
+    assert(allOneNoInterceptModel.intercept === 0.0)
+    println(allOneNoInterceptModel.summary.totalIterations > 0)
   }
 
   test("read/write") {

From 3d227c0dd3e89ff286ae6ed6df94bad706e569af Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Mon, 18 Jan 2016 16:57:04 +0000
Subject: [PATCH 6/8] Fix println

---
 .../spark/ml/classification/LogisticRegressionSuite.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index fb79a7c1f5f72..66e5d0e28dad1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -917,13 +917,13 @@ class LogisticRegressionSuite
       .setLabelCol("zeroLabel")
       .fit(sameLabels)
     assert(allZeroNoInterceptModel.intercept === 0.0)
-    println(allZeroNoInterceptModel.summary.totalIterations > 0)
+    assert(allZeroNoInterceptModel.summary.totalIterations > 0)
 
     val allOneNoInterceptModel = lrNoIntercept
       .setLabelCol("oneLabel")
       .fit(sameLabels)
     assert(allOneNoInterceptModel.intercept === 0.0)
-    println(allOneNoInterceptModel.summary.totalIterations > 0)
+    assert(allOneNoInterceptModel.summary.totalIterations > 0)
   }
 
   test("read/write") {

From c8c1586ffd5e5076c6aed564b41d99314099a5a0 Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Mon, 18 Jan 2016 16:57:39 +0000
Subject: [PATCH 7/8] Style fix

---
 .../spark/ml/classification/LogisticRegressionSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 66e5d0e28dad1..972c0868a454a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -909,7 +909,7 @@ class LogisticRegressionSuite
     assert(allOneInterceptModel.summary.totalIterations === 0)
 
     // fitIntercept=false
-    val lrNoIntercept= new LogisticRegression()
+    val lrNoIntercept = new LogisticRegression()
       .setFitIntercept(false)
       .setMaxIter(3)
 

From 95816d4c158d7e11cab8ac7bf28b9bb84026d33a Mon Sep 17 00:00:00 2001
From: Feynman Liang <feynman.liang@gmail.com>
Date: Mon, 18 Jan 2016 20:30:27 +0000
Subject: [PATCH 8/8] Fix indentation

---
 .../classification/LogisticRegression.scala   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0b3ba09bf698f..dad8dfc84ec15 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -337,18 +337,18 @@ class LogisticRegression @Since("1.2.0") (
 
         if ($(fitIntercept)) {
           /*
-            For binary logistic regression, when we initialize the coefficients as zeros,
-            it will converge faster if we initialize the intercept such that
-            it follows the distribution of the labels.
-
-            {{{
-              P(0) = 1 / (1 + \exp(b)), and
-              P(1) = \exp(b) / (1 + \exp(b))
-            }}}, hence
-            {{{
-              b = \log{P(1) / P(0)} = \log{count_1 / count_0}
-            }}}
-          */
+             For binary logistic regression, when we initialize the coefficients as zeros,
+             it will converge faster if we initialize the intercept such that
+             it follows the distribution of the labels.
+
+             {{{
+               P(0) = 1 / (1 + \exp(b)), and
+               P(1) = \exp(b) / (1 + \exp(b))
+             }}}, hence
+             {{{
+               b = \log{P(1) / P(0)} = \log{count_1 / count_0}
+             }}}
+           */
           initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
             histogram(1) / histogram(0))
         }