From 52ec9cd1c9920650cb588d15e1301e998a036371 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 24 Aug 2016 17:13:33 -0700
Subject: [PATCH 01/24] first pass at merging MLOR with LOR

---
 .../classification/LogisticRegression.scala   | 422 ++++++++++++++----
 .../classification/LogisticRegression.scala   |   5 +-
 .../MultinomialLogisticRegressionSuite.scala  | 210 ++++-----
 3 files changed, 454 insertions(+), 183 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 757d52052d87f..c8c06a4d7752b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -50,6 +50,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol
   with HasStandardization with HasWeightCol with HasThreshold with HasAggregationDepth {
 
+  import LogisticRegression._
+
   /**
    * Set threshold in binary classification, in range [0, 1].
    *
@@ -71,6 +73,25 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
     set(threshold, value)
   }
 
+
+  /**
+   * Param for the name of family which is a description of the error distribution
+   * to be used in the model.
+   * Supported options: "multinomial", "binomial".
+   * Default is "multinomial".
+   *
+   * @group param
+   */
+  @Since("2.0.0")
+  final val family: Param[String] = new Param(this, "family",
+    "The name of family which is a description of the error distribution to be used in the " +
+      s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
+    ParamValidators.inArray[String](supportedFamilyNames))
+
+  /** @group getParam */
+  @Since("2.0.0")
+  def getFamily: String = $(family)
+
   /**
    * Get threshold for binary classification.
    *
@@ -220,6 +241,17 @@ class LogisticRegression @Since("1.2.0") (
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
   setDefault(fitIntercept -> true)
 
+  /**
+   * Sets the value of param [[family]].
+   * Default is "multinomial".
+   *
+   * @group setParam
+   */
+  // TODO: don't use strings?
+  @Since("2.0.0")
+  def setFamily(value: String): this.type = set(family, value)
+  setDefault(family -> "multinomial")
+
   /**
    * Whether to standardize the training features before fitting the model.
    * The coefficients of models will be always returned on the original scale,
@@ -311,8 +343,25 @@ class LogisticRegression @Since("1.2.0") (
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
-    val numClasses = histogram.length
     val numFeatures = summarizer.mean.size
+    val numFeaturesPlusIntercept = if (getFitIntercept) numFeatures + 1 else numFeatures
+
+    val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
+      case Some(n: Int) =>
+        require(n >= histogram.length, s"Specified number of classes $n was " +
+          s"less than the number of unique labels ${histogram.length}")
+        n
+      case None => histogram.length
+    }
+    val isBinaryClassification = numClasses == 1 || numClasses == 2
+    val isMultinomial = !((!isSet(family) && isBinaryClassification) || $(family) == "binomial")
+    val numCoefficientSets = if (isMultinomial) numClasses else 1
+
+    // TODO: use enumeration or similar
+    if (!isMultinomial) {
+      require(isBinaryClassification, s"Binomial family only supports 1 or 2" +
+          s"outcome classes but found $numClasses")
+    }
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -333,22 +382,18 @@ class LogisticRegression @Since("1.2.0") (
 
       val isConstantLabel = histogram.count(_ != 0) == 1
 
-      if (numClasses > 2) {
-        val msg = s"LogisticRegression with ElasticNet in ML package only supports " +
-          s"binary classification. Found $numClasses in the input dataset. Consider using " +
-          s"MultinomialLogisticRegression instead."
-        logError(msg)
-        throw new SparkException(msg)
-      } else if ($(fitIntercept) && numClasses == 2 && isConstantLabel) {
-        logWarning(s"All labels are one and fitIntercept=true, so the coefficients will be " +
-          s"zeros and the intercept will be positive infinity; as a result, " +
-          s"training is not needed.")
-        (Vectors.sparse(numFeatures, Seq()), Double.PositiveInfinity, Array.empty[Double])
-      } else if ($(fitIntercept) && numClasses == 1) {
-        logWarning(s"All labels are zero and fitIntercept=true, so the coefficients will be " +
-          s"zeros and the intercept will be negative infinity; as a result, " +
-          s"training is not needed.")
-        (Vectors.sparse(numFeatures, Seq()), Double.NegativeInfinity, Array.empty[Double])
+      if ($(fitIntercept) && isConstantLabel) {
+        logWarning(s"All labels are the same value and fitIntercept=true, so the coefficients " +
+          s"will be zeros. Training is not needed.")
+        val constantLabelIndex = Vectors.dense(histogram).argmax
+        val coefficientMatrix = Matrices.sparse(numCoefficientSets, numFeatures,
+          Array.fill(numFeatures + 1)(0), Array.empty[Int], Array.empty[Double])
+        val interceptVector = if (isMultinomial) {
+          Vectors.sparse(numClasses, Seq((constantLabelIndex, Double.PositiveInfinity)))
+        } else {
+          Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
+        }
+        (coefficientMatrix, interceptVector, Array.empty[Double])
       } else {
         if (!$(fitIntercept) && isConstantLabel) {
           logWarning(s"All labels belong to a single class and fitIntercept=false. It's a " +
@@ -370,35 +415,52 @@ class LogisticRegression @Since("1.2.0") (
 
         val bcFeaturesStd = instances.context.broadcast(featuresStd)
         val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept),
-          $(standardization), bcFeaturesStd, regParamL2, multinomial = false, $(aggregationDepth))
+          $(standardization), bcFeaturesStd, regParamL2, multinomial = isMultinomial,
+          $(aggregationDepth))
 
         val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
           new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
         } else {
           val standardizationParam = $(standardization)
+          // TODO: check this works in both cases
           def regParamL1Fun = (index: Int) => {
             // Remove the L1 penalization on the intercept
-            if (index == numFeatures) {
+            val isIntercept = $(fitIntercept) && ((index + 1) % numFeaturesPlusIntercept == 0)
+            if (isIntercept) {
               0.0
             } else {
               if (standardizationParam) {
                 regParamL1
               } else {
+                val featureIndex = if ($(fitIntercept)) {
+                  index % numFeaturesPlusIntercept
+                } else {
+                  index % numFeatures
+                }
                 // If `standardization` is false, we still standardize the data
                 // to improve the rate of convergence; as a result, we have to
                 // perform this reverse standardization by penalizing each component
                 // differently to get effectively the same objective function when
                 // the training dataset is not standardized.
-                if (featuresStd(index) != 0.0) regParamL1 / featuresStd(index) else 0.0
+                if (featuresStd(featureIndex) != 0.0) {
+                  regParamL1 / featuresStd(featureIndex)
+                } else {
+                  0.0
+                }
               }
             }
           }
           new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
         }
 
-        val initialCoefficientsWithIntercept =
-          Vectors.zeros(if ($(fitIntercept)) numFeatures + 1 else numFeatures)
+        // TODO: double check this
+        val initialCoefficientsWithIntercept = if (isMultinomial) {
+          Vectors.zeros(numClasses * numFeaturesPlusIntercept)
+        } else {
+          Vectors.zeros(numFeaturesPlusIntercept)
+        }
 
+        // TODO: need to add this for multinomial case
         if (optInitialModel.isDefined && optInitialModel.get.coefficients.size != numFeatures) {
           val vecSize = optInitialModel.get.coefficients.size
           logWarning(
@@ -406,13 +468,46 @@ class LogisticRegression @Since("1.2.0") (
             s"expected size $numFeatures")
         }
 
-        if (optInitialModel.isDefined && optInitialModel.get.coefficients.size == numFeatures) {
-          val initialCoefficientsWithInterceptArray = initialCoefficientsWithIntercept.toArray
-          optInitialModel.get.coefficients.foreachActive { case (index, value) =>
-            initialCoefficientsWithInterceptArray(index) = value
-          }
-          if ($(fitIntercept)) {
-            initialCoefficientsWithInterceptArray(numFeatures) == optInitialModel.get.intercept
+        // TODO: removing initial model for now
+//        if (optInitialModel.isDefined && optInitialModel.get.coefficients.size == numFeatures) {
+//          val initialCoefficientsWithInterceptArray = initialCoefficientsWithIntercept.toArray
+//          optInitialModel.get.coefficients.foreachActive { case (index, value) =>
+//            initialCoefficientsWithInterceptArray(index) = value
+//          }
+//          if ($(fitIntercept)) {
+//            initialCoefficientsWithInterceptArray(numFeatures) == optInitialModel.get.intercept
+//          }
+//        }
+        if ($(fitIntercept) && isMultinomial) {
+          // TODO: can we merge the logic or something here?
+          /*
+             For multinomial logistic regression, when we initialize the coefficients as zeros,
+             it will converge faster if we initialize the intercepts such that
+             it follows the distribution of the labels.
+             {{{
+               P(1) = \exp(b_1) / Z
+               ...
+               P(K) = \exp(b_K) / Z
+               where Z = \sum_{k=1}^{K} \exp(b_k)
+             }}}
+             Since this doesn't have a unique solution, one of the solutions that satisfies the
+             above equations is
+             {{{
+               \exp(b_k) = count_k * \exp(\lambda)
+               b_k = \log(count_k) * \lambda
+             }}}
+             \lambda is a free parameter, so choose the phase \lambda such that the
+             mean is centered. This yields
+             {{{
+               b_k = \log(count_k)
+               b_k' = b_k - \mean(b_k)
+             }}}
+           */
+          val rawIntercepts = histogram.map(c => math.log(c + 1)) // add 1 for smoothing
+          val rawMean = rawIntercepts.sum / rawIntercepts.length
+          rawIntercepts.indices.foreach { i =>
+            initialCoefficientsWithIntercept.toArray(i * numFeaturesPlusIntercept + numFeatures) =
+              rawIntercepts(i) - rawMean
           }
         } else if ($(fitIntercept)) {
           /*
@@ -452,6 +547,7 @@ class LogisticRegression @Since("1.2.0") (
           logError(msg)
           throw new SparkException(msg)
         }
+        bcFeaturesStd.destroy(blocking = false)
 
         /*
            The coefficients are trained in the scaled space; we're converting them back to
@@ -460,25 +556,62 @@ class LogisticRegression @Since("1.2.0") (
            as a result, no scaling is needed.
          */
         val rawCoefficients = state.x.toArray.clone()
-        var i = 0
-        while (i < numFeatures) {
-          rawCoefficients(i) *= { if (featuresStd(i) != 0.0) 1.0 / featuresStd(i) else 0.0 }
-          i += 1
+        // TODO: I think this will work for both binomial and multinomial
+        val coefficientArray = Array.tabulate(numCoefficientSets * numFeatures) { i =>
+          // flatIndex will loop though rawCoefficients, and skip the intercept terms.
+          val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
+          val featureIndex = i % numFeatures
+          if (featuresStd(featureIndex) != 0.0) {
+            rawCoefficients(flatIndex) / featuresStd(featureIndex)
+          } else {
+            0.0
+          }
         }
-        bcFeaturesStd.destroy(blocking = false)
+        val coefficientMatrix =
+          new DenseMatrix(numCoefficientSets, numFeatures, coefficientArray, isTransposed = true)
 
-        if ($(fitIntercept)) {
-          (Vectors.dense(rawCoefficients.dropRight(1)).compressed, rawCoefficients.last,
-            arrayBuilder.result())
+        if ($(regParam) == 0.0 && isMultinomial) {
+          /*
+            When no regularization is applied, the coefficients lack identifiability because
+            we do not use a pivot class. We can add any constant value to the coefficients and
+            get the same likelihood. So here, we choose the mean centered coefficients for
+            reproducibility. This method follows the approach in glmnet, described here:
+
+            Friedman, et al. "Regularization Paths for Generalized Linear Models via
+              Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
+           */
+          val coefficientMean = coefficientMatrix.values.sum / coefficientMatrix.values.length
+          coefficientMatrix.update(_ - coefficientMean)
+        }
+
+        val interceptsArray: Array[Double] = if ($(fitIntercept)) {
+          Array.tabulate(numCoefficientSets) { i =>
+            val coefIndex = (i + 1) * numFeaturesPlusIntercept - 1
+            rawCoefficients(coefIndex)
+          }
+        } else {
+          Array[Double]()
+        }
+        /*
+          The intercepts are never regularized, so we always center the mean.
+         */
+        val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
+          val interceptMean = interceptsArray.sum / numClasses
+          interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
+          Vectors.dense(interceptsArray)
+        } else if (interceptsArray.nonEmpty) {
+          Vectors.dense(interceptsArray)
         } else {
-          (Vectors.dense(rawCoefficients).compressed, 0.0, arrayBuilder.result())
+          Vectors.sparse(numClasses, Seq())
         }
+        (coefficientMatrix, interceptVector, arrayBuilder.result())
       }
     }
 
     if (handlePersistence) instances.unpersist()
 
-    val model = copyValues(new LogisticRegressionModel(uid, coefficients, intercept))
+    val model = copyValues(new LogisticRegressionModel(uid, coefficients, intercept, numClasses,
+      isMultinomial))
     val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
     val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
       summaryModel.transform(dataset),
@@ -500,6 +633,8 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
 
   @Since("1.6.0")
   override def load(path: String): LogisticRegression = super.load(path)
+
+  private[classification] lazy val supportedFamilyNames = Array("binomial", "multinomial")
 }
 
 /**
@@ -508,11 +643,34 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
 @Since("1.4.0")
 class LogisticRegressionModel private[spark] (
     @Since("1.4.0") override val uid: String,
-    @Since("2.0.0") val coefficients: Vector,
-    @Since("1.3.0") val intercept: Double)
+    @Since("2.1.0") val coefficientMatrix: Matrix,
+    @Since("2.1.0") val interceptVector: Vector,
+    @Since("1.3.0") override val numClasses: Int,
+    private val isMultinomial: Boolean)
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams with MLWritable {
 
+  def this(uid: String, coefficients: Vector, intercept: Double) {
+    this(uid,
+      new DenseMatrix(1, coefficients.size, coefficients.toArray, isTransposed = true),
+      Vectors.dense(intercept), 2, false)
+  }
+
+  @Since("2.0.0")
+  // TODO: this should convert sparse to sparse and dense to dense
+  val coefficients: Vector = Vectors.dense(coefficientMatrix.toArray)
+
+  @Since("1.3.0")
+  def intercept: Double = {
+    if (isMultinomial) {
+      logWarning("Multiclass model contains an vector of intercepts, use interceptVector instead." +
+        "Returning 0.0 as placeholder.")
+    }
+    _intercept
+  }
+
+  private val _intercept = if (!isMultinomial) interceptVector.toArray.head else 0.0
+
   @Since("1.5.0")
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
 
@@ -527,7 +685,14 @@ class LogisticRegressionModel private[spark] (
 
   /** Margin (rawPrediction) for class label 1.  For binary classification only. */
   private val margin: Vector => Double = (features) => {
-    BLAS.dot(features, coefficients) + intercept
+    BLAS.dot(features, coefficients) + _intercept
+  }
+
+  /** Margin (rawPrediction) for each class label. */
+  private val margins: Vector => Vector = (features) => {
+    val m = interceptVector.toDense.copy
+    BLAS.gemv(1.0, coefficientMatrix, features, 1.0, m)
+    m
   }
 
   /** Score (probability) for class label 1.  For binary classification only. */
@@ -536,11 +701,36 @@ class LogisticRegressionModel private[spark] (
     1.0 / (1.0 + math.exp(-m))
   }
 
-  @Since("1.6.0")
-  override val numFeatures: Int = coefficients.size
+  /** Score (probability) for each class label. */
+  private val scores: Vector => Vector = (features) => {
+    val m = margins(features)
+    val maxMarginIndex = m.argmax
+    val marginArray = m.toArray
+    val maxMargin = marginArray(maxMarginIndex)
 
-  @Since("1.3.0")
-  override val numClasses: Int = 2
+    // adjust margins for overflow
+    val sum = {
+      var temp = 0.0
+      var k = 0
+      while (k < numClasses) {
+        marginArray(k) = if (maxMargin > 0) {
+          math.exp(marginArray(k) - maxMargin)
+        } else {
+          math.exp(marginArray(k))
+        }
+        temp += marginArray(k)
+        k += 1
+      }
+      temp
+    }
+
+    val scores = Vectors.dense(marginArray)
+    BLAS.scal(1 / sum, scores)
+    scores
+  }
+
+  @Since("1.6.0")
+  override val numFeatures: Int = coefficientMatrix.numCols
 
   private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None
 
@@ -597,19 +787,80 @@ class LogisticRegressionModel private[spark] (
    */
   override protected def predict(features: Vector): Double = {
     // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
-    if (score(features) > getThreshold) 1 else 0
+    if (isMultinomial) {
+      if (isDefined(thresholds)) {
+        val thresholds: Array[Double] = getThresholds
+        val probabilities = scores(features).toArray
+        var argMax = 0
+        var max = Double.NegativeInfinity
+        var i = 0
+        while (i < numClasses) {
+          if (thresholds(i) == 0.0) {
+            max = Double.PositiveInfinity
+            argMax = i
+          } else {
+            val scaled = probabilities(i) / thresholds(i)
+            if (scaled > max) {
+              max = scaled
+              argMax = i
+            }
+          }
+          i += 1
+        }
+        argMax
+      } else {
+        scores(features).argmax
+      }
+    }
+    else {
+      if (score(features) > getThreshold) 1 else 0
+    }
   }
 
   override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
     rawPrediction match {
       case dv: DenseVector =>
-        var i = 0
-        val size = dv.size
-        while (i < size) {
-          dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
-          i += 1
+        if (isMultinomial) {
+          val size = dv.size
+          val values = dv.values
+
+          // get the maximum margin
+          val maxMarginIndex = rawPrediction.argmax
+          val maxMargin = rawPrediction(maxMarginIndex)
+
+          if (maxMargin == Double.PositiveInfinity) {
+            var k = 0
+            while (k < size) {
+              values(k) = if (k == maxMarginIndex) 1.0 else 0.0
+              k += 1
+            }
+          } else {
+            val sum = {
+              var temp = 0.0
+              var k = 0
+              while (k < numClasses) {
+                values(k) = if (maxMargin > 0) {
+                  math.exp(values(k) - maxMargin)
+                } else {
+                  math.exp(values(k))
+                }
+                temp += values(k)
+                k += 1
+              }
+              temp
+            }
+            BLAS.scal(1 / sum, dv)
+          }
+          dv
+        } else {
+          var i = 0
+          val size = dv.size
+          while (i < size) {
+            dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
+            i += 1
+          }
+          dv
         }
-        dv
       case sv: SparseVector =>
         throw new RuntimeException("Unexpected error in LogisticRegressionModel:" +
           " raw2probabilitiesInPlace encountered SparseVector")
@@ -617,33 +868,46 @@ class LogisticRegressionModel private[spark] (
   }
 
   override protected def predictRaw(features: Vector): Vector = {
-    val m = margin(features)
-    Vectors.dense(-m, m)
+    if (isMultinomial) {
+      margins(features)
+    } else {
+      val m = margin(features)
+      Vectors.dense(-m, m)
+    }
   }
 
   @Since("1.4.0")
   override def copy(extra: ParamMap): LogisticRegressionModel = {
-    val newModel = copyValues(new LogisticRegressionModel(uid, coefficients, intercept), extra)
+    val newModel = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, interceptVector,
+      numClasses, isMultinomial), extra)
     if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
     newModel.setParent(parent)
   }
-
+  // TODO: basically check all these methods
   override protected def raw2prediction(rawPrediction: Vector): Double = {
-    // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
-    val t = getThreshold
-    val rawThreshold = if (t == 0.0) {
-      Double.NegativeInfinity
-    } else if (t == 1.0) {
-      Double.PositiveInfinity
+    if (isMultinomial) {
+      super.raw2prediction(rawPrediction)
     } else {
-      math.log(t / (1.0 - t))
+      // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
+      val t = getThreshold
+      val rawThreshold = if (t == 0.0) {
+        Double.NegativeInfinity
+      } else if (t == 1.0) {
+        Double.PositiveInfinity
+      } else {
+        math.log(t / (1.0 - t))
+      }
+      if (rawPrediction(1) > rawThreshold) 1 else 0
     }
-    if (rawPrediction(1) > rawThreshold) 1 else 0
   }
 
   override protected def probability2prediction(probability: Vector): Double = {
     // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
-    if (probability(1) > getThreshold) 1 else 0
+    if (isMultinomial) {
+      super.probability2prediction(probability)
+    } else {
+      if (probability(1) > getThreshold) 1 else 0
+    }
   }
 
   /**
@@ -676,15 +940,16 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
     private case class Data(
         numClasses: Int,
         numFeatures: Int,
-        intercept: Double,
-        coefficients: Vector)
+        interceptVector: Vector,
+        coefficientMatrix: Matrix,
+        isMultinomial: Boolean)
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
       DefaultParamsWriter.saveMetadata(instance, path, sc)
       // Save model data: numClasses, numFeatures, intercept, coefficients
-      val data = Data(instance.numClasses, instance.numFeatures, instance.intercept,
-        instance.coefficients)
+      val data = Data(instance.numClasses, instance.numFeatures, instance.interceptVector,
+        instance.coefficientMatrix, instance.isMultinomial)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
@@ -702,13 +967,15 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
 
-      // We will need numClasses, numFeatures in the future for multinomial logreg support.
-      // TODO: remove numClasses and numFeatures fields?
-      val Row(numClasses: Int, numFeatures: Int, intercept: Double, coefficients: Vector) =
-        MLUtils.convertVectorColumnsToML(data, "coefficients")
-          .select("numClasses", "numFeatures", "intercept", "coefficients")
-          .head()
-      val model = new LogisticRegressionModel(metadata.uid, coefficients, intercept)
+      val convertedCoefs = MLUtils.convertMatrixColumnsToML(data, "coefficientMatrix")
+      val converted = MLUtils.convertVectorColumnsToML(convertedCoefs, "interceptVector")
+        .select("numClasses", "numFeatures", "interceptVector", "coefficientMatrix",
+          "isMultinomial")
+      // TODO: numFeatures not needed?
+      val Row(numClasses: Int, numFeatures: Int, interceptVector: Vector,
+        coefficientMatrix: Matrix, isMultinomial: Boolean) = converted.head()
+      val model = new LogisticRegressionModel(metadata.uid, coefficientMatrix, interceptVector,
+        numClasses, isMultinomial)
 
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
@@ -1103,6 +1370,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
+ *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index e4cbf5acbc11d..ad3dab33d2909 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.classification
 
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
+import org.apache.spark.ml.linalg.DenseMatrix
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
@@ -429,9 +430,11 @@ class LogisticRegressionWithLBFGS
         lr.setElasticNetParam(elasticNetParam)
         lr.setStandardization(useFeatureScaling)
         if (userSuppliedWeights) {
+          // TODO: check this
           val uid = Identifiable.randomUID("logreg-static")
           lr.setInitialModel(new org.apache.spark.ml.classification.LogisticRegressionModel(
-            uid, initialWeights.asML, 1.0))
+            uid, new DenseMatrix(1, initialWeights.size, initialWeights.toArray, isTransposed=true),
+            Vectors.dense(0.0).asML, 2, false))
         }
         lr.setFitIntercept(addIntercept)
         lr.setMaxIter(optimizer.getNumIterations())
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
index 0913fe559c562..9c7e08820d93b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
@@ -87,14 +87,14 @@ class MultinomialLogisticRegressionSuite
   }
 
   test("params") {
-    ParamsSuite.checkParams(new MultinomialLogisticRegression)
-    val model = new MultinomialLogisticRegressionModel("mLogReg",
-      Matrices.dense(2, 1, Array(0.0, 0.0)), Vectors.dense(0.0, 0.0), 2)
+    ParamsSuite.checkParams(new LogisticRegression)
+    val model = new LogisticRegressionModel("mLogReg",
+      Matrices.dense(2, 1, Array(0.0, 0.0)), Vectors.dense(0.0, 0.0), 2, true)
     ParamsSuite.checkParams(model)
   }
 
   test("multinomial logistic regression: default params") {
-    val mlr = new MultinomialLogisticRegression
+    val mlr = new LogisticRegression
     assert(mlr.getLabelCol === "label")
     assert(mlr.getFeaturesCol === "features")
     assert(mlr.getPredictionCol === "prediction")
@@ -112,15 +112,15 @@ class MultinomialLogisticRegressionSuite
     assert(model.getPredictionCol === "prediction")
     assert(model.getRawPredictionCol === "rawPrediction")
     assert(model.getProbabilityCol === "probability")
-    assert(model.intercepts !== Vectors.dense(0.0, 0.0))
+    assert(model.interceptVector !== Vectors.dense(0.0, 0.0))
     assert(model.hasParent)
   }
 
   test("multinomial logistic regression with intercept without regularization") {
 
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setMaxIter(100)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
 
     val model1 = trainer1.fit(multinomialDataset)
@@ -166,21 +166,21 @@ class MultinomialLogisticRegressionSuite
       0.0817812, -0.8502072, 0.0830284, 0.0761248), isTransposed = true)
     val interceptsR = Vectors.dense(-2.2449338, 0.3778931, 1.8670407)
 
-    assert(model1.coefficients ~== coefficientsR relTol 0.05)
-    assert(model1.coefficients.toArray.sum ~== 0.0 absTol eps)
-    assert(model1.intercepts ~== interceptsR relTol 0.05)
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR relTol 0.05)
-    assert(model2.coefficients.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.intercepts ~== interceptsR relTol 0.05)
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.interceptVector ~== interceptsR relTol 0.05)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.interceptVector ~== interceptsR relTol 0.05)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression without intercept without regularization") {
 
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
 
     val model1 = trainer1.fit(multinomialDataset)
@@ -226,23 +226,23 @@ class MultinomialLogisticRegressionSuite
       -0.3036269, 0.9449630, -0.2271038, -0.4364839,
       0.2337022, -0.5793351, 0.1056770, 0.1159618), isTransposed = true)
 
-    assert(model1.coefficients ~== coefficientsR relTol 0.05)
-    assert(model1.coefficients.toArray.sum ~== 0.0 absTol eps)
-    assert(model1.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR relTol 0.05)
-    assert(model2.coefficients.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression with intercept with L1 regularization") {
 
     // use tighter constraints because OWL-QN solver takes longer to converge
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
       .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
       .setMaxIter(300).setTol(1e-10)
 
@@ -328,18 +328,18 @@ class MultinomialLogisticRegressionSuite
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
     val interceptsR = Vectors.dense(-0.44893320, 0.7376760, -0.2887428)
 
-    assert(model1.coefficients ~== coefficientsRStd absTol 0.02)
-    assert(model1.intercepts ~== interceptsRStd relTol 0.1)
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR absTol 0.02)
-    assert(model2.intercepts ~== interceptsR relTol 0.1)
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.02)
+    assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.02)
+    assert(model2.interceptVector ~== interceptsR relTol 0.1)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression without intercept with L1 regularization") {
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
 
     val model1 = trainer1.fit(multinomialDataset)
@@ -421,18 +421,18 @@ class MultinomialLogisticRegressionSuite
       0.0, 0.1943624, -0.1902577, -0.1028789,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
-    assert(model1.coefficients ~== coefficientsRStd absTol 0.01)
-    assert(model1.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR absTol 0.01)
-    assert(model2.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression with intercept with L2 regularization") {
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
 
     val model1 = trainer1.fit(multinomialDataset)
@@ -516,18 +516,18 @@ class MultinomialLogisticRegressionSuite
       0.04032627, -0.29756637, 0.06265594, 0.02972883), isTransposed = true)
     val interceptsR = Vectors.dense(-1.65488543, 1.1297533, 0.52513212)
 
-    assert(model1.coefficients ~== coefficientsRStd relTol 0.05)
-    assert(model1.intercepts ~== interceptsRStd relTol 0.05)
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR relTol 0.05)
-    assert(model2.intercepts ~== interceptsR relTol 0.05)
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd relTol 0.05)
+    assert(model1.interceptVector ~== interceptsRStd relTol 0.05)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.interceptVector ~== interceptsR relTol 0.05)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression without intercept with L2 regularization") {
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
 
     val model1 = trainer1.fit(multinomialDataset)
@@ -607,19 +607,19 @@ class MultinomialLogisticRegressionSuite
       -0.08469036, 0.38996748, -0.16468436, -0.22522976,
       0.0903949, -0.24550107, 0.07260362, 0.0423021), isTransposed = true)
 
-    assert(model1.coefficients ~== coefficientsRStd absTol 0.01)
-    assert(model1.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR absTol 0.01)
-    assert(model2.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression with intercept with elasticnet regularization") {
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
       .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
       .setMaxIter(300).setTol(1e-10)
 
@@ -704,19 +704,19 @@ class MultinomialLogisticRegressionSuite
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
     val interceptsR = Vectors.dense(-0.39876213, 0.61089869, -0.2121366)
 
-    assert(model1.coefficients ~== coefficientsRStd absTol 0.01)
-    assert(model1.intercepts ~== interceptsRStd absTol 0.01)
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR absTol 0.01)
-    assert(model2.intercepts ~== interceptsR absTol 0.01)
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector ~== interceptsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector ~== interceptsR absTol 0.01)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   test("multinomial logistic regression without intercept with elasticnet regularization") {
-    val trainer1 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
       .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new MultinomialLogisticRegression).setFitIntercept(false)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
       .setMaxIter(300).setTol(1e-10)
 
@@ -798,12 +798,12 @@ class MultinomialLogisticRegressionSuite
       0.0, 0.14666497, -0.16570638, -0.05982875,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
-    assert(model1.coefficients ~== coefficientsRStd absTol 0.01)
-    assert(model1.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model1.intercepts.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficients ~== coefficientsR absTol 0.01)
-    assert(model2.intercepts.toArray === Array.fill(3)(0.0))
-    assert(model2.intercepts.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
   /*
@@ -814,9 +814,9 @@ class MultinomialLogisticRegressionSuite
    */
 
   test("prediction") {
-    val model = new MultinomialLogisticRegressionModel("mLogReg",
+    val model = new LogisticRegressionModel("mLogReg",
       Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
-      Vectors.dense(0.0, 0.0, 0.0), 3)
+      Vectors.dense(0.0, 0.0, 0.0), 3, true)
     val overFlowData = spark.createDataFrame(Seq(
       LabeledPoint(1.0, Vectors.dense(0.0, 1000.0)),
       LabeledPoint(1.0, Vectors.dense(0.0, -1.0))
@@ -837,7 +837,7 @@ class MultinomialLogisticRegressionSuite
   }
 
   test("multinomial logistic regression: Predictor, Classifier methods") {
-    val mlr = new MultinomialLogisticRegression
+    val mlr = new LogisticRegression
 
     val model = mlr.fit(dataset)
     assert(model.numClasses === 3)
@@ -852,9 +852,9 @@ class MultinomialLogisticRegressionSuite
         val margins = Array.tabulate(3) { k =>
           var margin = 0.0
           features.foreachActive { (index, value) =>
-            margin += value * model.coefficients(k, index)
+            margin += value * model.coefficientMatrix(k, index)
           }
-          margin += model.intercepts(k)
+          margin += model.interceptVector(k)
           margin
         }
         assert(raw ~== Vectors.dense(margins) relTol eps)
@@ -884,21 +884,21 @@ class MultinomialLogisticRegressionSuite
   }
 
   test("multinomial logistic regression coefficients should be centered") {
-    val mlr = new MultinomialLogisticRegression().setMaxIter(1)
+    val mlr = new LogisticRegression().setMaxIter(1)
     val model = mlr.fit(dataset)
-    assert(model.intercepts.toArray.sum ~== 0.0 absTol 1e-6)
-    assert(model.coefficients.toArray.sum ~== 0.0 absTol 1e-6)
+    assert(model.interceptVector.toArray.sum ~== 0.0 absTol 1e-6)
+    assert(model.coefficientMatrix.toArray.sum ~== 0.0 absTol 1e-6)
   }
 
   test("numClasses specified in metadata/inferred") {
-    val mlr = new MultinomialLogisticRegression().setMaxIter(1)
+    val mlr = new LogisticRegression().setMaxIter(1)
 
     // specify more classes than unique label values
     val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(4).toMetadata()
     val df = dataset.select(dataset("label").as("label", labelMeta), dataset("features"))
     val model1 = mlr.fit(df)
     assert(model1.numClasses === 4)
-    assert(model1.intercepts.size === 4)
+    assert(model1.interceptVector.size === 4)
 
     // specify two classes when there are really three
     val labelMeta1 = NominalAttribute.defaultAttr.withName("label").withNumValues(2).toMetadata()
@@ -919,7 +919,7 @@ class MultinomialLogisticRegressionSuite
       LabeledPoint(4.0, Vectors.dense(1.0)),
       LabeledPoint(4.0, Vectors.dense(2.0)))
     )
-    val mlr = new MultinomialLogisticRegression
+    val mlr = new LogisticRegression().setFamily("multinomial")
     val model = mlr.fit(constantData)
     val results = model.transform(constantData)
     results.select("rawPrediction", "probability", "prediction").collect().foreach {
@@ -966,7 +966,7 @@ class MultinomialLogisticRegressionSuite
     val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
       LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
     })
-    val mlr = new MultinomialLogisticRegression().setWeightCol("weight")
+    val mlr = new LogisticRegression().setWeightCol("weight")
     val model = mlr.fit(outlierData)
     val results = model.transform(testData).select("label", "prediction").collect()
 
@@ -979,11 +979,11 @@ class MultinomialLogisticRegressionSuite
         42L)
     val weightedModel = mlr.fit(weightedData)
     val overSampledModel = mlr.setWeightCol("").fit(overSampledData)
-    assert(weightedModel.coefficients ~== overSampledModel.coefficients relTol 0.01)
+    assert(weightedModel.coefficientMatrix ~== overSampledModel.coefficientMatrix relTol 0.01)
   }
 
   test("thresholds prediction") {
-    val mlr = new MultinomialLogisticRegression
+    val mlr = new LogisticRegression
     val model = mlr.fit(dataset)
     val basePredictions = model.transform(dataset).select("prediction").collect()
 
@@ -1010,28 +1010,28 @@ class MultinomialLogisticRegressionSuite
     })
   }
 
-  test("read/write") {
-    def checkModelData(
-        model: MultinomialLogisticRegressionModel,
-        model2: MultinomialLogisticRegressionModel): Unit = {
-      assert(model.intercepts === model2.intercepts)
-      assert(model.coefficients.toArray === model2.coefficients.toArray)
-      assert(model.numClasses === model2.numClasses)
-      assert(model.numFeatures === model2.numFeatures)
-    }
-    val mlr = new MultinomialLogisticRegression()
-    testEstimatorAndModelReadWrite(mlr, dataset,
-      MultinomialLogisticRegressionSuite.allParamSettings,
-      checkModelData)
-  }
+//  test("read/write") {
+//    def checkModelData(
+//        model: LogisticRegressionModel,
+//        model2: LogisticRegressionModel): Unit = {
+//      assert(model.interceptVector === model2.interceptVector)
+//      assert(model.coefficientMatrix.toArray === model2.coefficients.toArray)
+//      assert(model.numClasses === model2.numClasses)
+//      assert(model.numFeatures === model2.numFeatures)
+//    }
+//    val mlr = new LogisticRegression()
+//    testEstimatorAndModelReadWrite(mlr, dataset,
+//      MultinomialLogisticRegressionSuite.allParamSettings,
+//      checkModelData)
+//  }
 
   test("should support all NumericType labels and not support other types") {
-    val mlr = new MultinomialLogisticRegression().setMaxIter(1)
+    val mlr = new LogisticRegression().setMaxIter(1)
     MLTestingUtils
-      .checkNumericTypes[MultinomialLogisticRegressionModel, MultinomialLogisticRegression](
+      .checkNumericTypes[LogisticRegressionModel, LogisticRegression](
         mlr, spark) { (expected, actual) =>
-        assert(expected.intercepts === actual.intercepts)
-        assert(expected.coefficients.toArray === actual.coefficients.toArray)
+        assert(expected.interceptVector === actual.interceptVector)
+        assert(expected.coefficientMatrix.toArray === actual.coefficients.toArray)
       }
   }
 }

From d4675bea0c531a786381adb9c4763f97ae8bcb9e Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 24 Aug 2016 22:05:46 -0700
Subject: [PATCH 02/24] add initial model

---
 .../classification/LogisticRegression.scala   | 46 +++++++++++--------
 .../LogisticRegressionSuite.scala             | 36 ++++++++++++++-
 2 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index c8c06a4d7752b..15a2450f464de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -354,10 +354,10 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
     val isBinaryClassification = numClasses == 1 || numClasses == 2
+    // TODO: use enumeration or similar
     val isMultinomial = !((!isSet(family) && isBinaryClassification) || $(family) == "binomial")
     val numCoefficientSets = if (isMultinomial) numClasses else 1
 
-    // TODO: use enumeration or similar
     if (!isMultinomial) {
       require(isBinaryClassification, s"Binomial family only supports 1 or 2" +
           s"outcome classes but found $numClasses")
@@ -461,25 +461,33 @@ class LogisticRegression @Since("1.2.0") (
         }
 
         // TODO: need to add this for multinomial case
-        if (optInitialModel.isDefined && optInitialModel.get.coefficients.size != numFeatures) {
-          val vecSize = optInitialModel.get.coefficients.size
-          logWarning(
-            s"Initial coefficients will be ignored!! As its size $vecSize did not match the " +
-            s"expected size $numFeatures")
+        val initialModelIsValid = optInitialModel.exists { model =>
+          val providedCoefs = model.coefficientMatrix
+          val modelValid = (providedCoefs.numRows == numCoefficientSets) &&
+            (providedCoefs.numCols == numFeatures) &&
+            (model.interceptVector.size == numCoefficientSets)
+          if (!modelValid) {
+            logWarning(s"Initial coefficients will be ignored! Its dimensions " +
+              s"(${providedCoefs.numRows}, ${providedCoefs.numCols}) did not match the expected " +
+              s"size ($numCoefficientSets, $numFeatures)")
+          }
+          modelValid
         }
 
-        // TODO: removing initial model for now
-//        if (optInitialModel.isDefined && optInitialModel.get.coefficients.size == numFeatures) {
-//          val initialCoefficientsWithInterceptArray = initialCoefficientsWithIntercept.toArray
-//          optInitialModel.get.coefficients.foreachActive { case (index, value) =>
-//            initialCoefficientsWithInterceptArray(index) = value
-//          }
-//          if ($(fitIntercept)) {
-//            initialCoefficientsWithInterceptArray(numFeatures) == optInitialModel.get.intercept
-//          }
-//        }
-        if ($(fitIntercept) && isMultinomial) {
-          // TODO: can we merge the logic or something here?
+        if (initialModelIsValid) {
+          val initialCoefArray = initialCoefficientsWithIntercept.toArray
+          val providedCoefArray = optInitialModel.get.coefficientMatrix.toArray
+          providedCoefArray.indices.foreach { i =>
+            val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
+            initialCoefArray(flatIndex) = providedCoefArray(i)
+          }
+          if ($(fitIntercept)) {
+            optInitialModel.get.interceptVector.foreachActive { (index, value) =>
+              val coefIndex = (index + 1) * numFeaturesPlusIntercept - 1
+              initialCoefArray(coefIndex) = value
+            }
+          }
+        } else if ($(fitIntercept) && isMultinomial) {
           /*
              For multinomial logistic regression, when we initialize the coefficients as zeros,
              it will converge faster if we initialize the intercepts such that
@@ -556,7 +564,6 @@ class LogisticRegression @Since("1.2.0") (
            as a result, no scaling is needed.
          */
         val rawCoefficients = state.x.toArray.clone()
-        // TODO: I think this will work for both binomial and multinomial
         val coefficientArray = Array.tabulate(numCoefficientSets * numFeatures) { i =>
           // flatIndex will loop though rawCoefficients, and skip the intercept terms.
           val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
@@ -612,6 +619,7 @@ class LogisticRegression @Since("1.2.0") (
 
     val model = copyValues(new LogisticRegressionModel(uid, coefficients, intercept, numClasses,
       isMultinomial))
+    // TODO: need to implement model summary for MLOR... probably best to do it in another JIRA
     val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
     val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
       summaryModel.transform(dataset),
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index a1b48539c46e0..a0af82c2ea42c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -25,7 +25,7 @@ import scala.util.control.Breaks._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.linalg.{DenseMatrix, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -37,7 +37,8 @@ class LogisticRegressionSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   @transient var dataset: Dataset[_] = _
-  @transient var binaryDataset: DataFrame = _
+  @transient var binaryDataset: Dataset[_] = _
+  @transient var multinomialDataset: Dataset[_] = _
   private val eps: Double = 1e-5
 
   override def beforeAll(): Unit = {
@@ -57,6 +58,23 @@ class LogisticRegressionSuite
 
       spark.createDataFrame(sc.parallelize(testData, 4))
     }
+
+    multinomialDataset = {
+      val nPoints = 10000
+      val coefficients = Array(
+        -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+        -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
+
+      val xMean = Array(5.843, 3.057, 3.758, 1.199)
+      val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+
+      val testData = generateMultinomialLogisticInput(
+        coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
+
+      val df = spark.createDataFrame(sc.parallelize(testData, 4))
+      df.cache()
+      df
+    }
   }
 
   /**
@@ -886,6 +904,20 @@ class LogisticRegressionSuite
     assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
   }
 
+  test("set initial model") {
+    // TODO: the binary one doesn't converge any faster
+    // TODO: should they converge after one or two iterations?
+    val lr = new LogisticRegression()
+    val model1 = lr.fit(binaryDataset)
+    val lr2 = new LogisticRegression().setInitialModel(model1)
+    val model2 = lr2.fit(binaryDataset)
+
+    val lr3 = new LogisticRegression()
+    val model3 = lr3.fit(multinomialDataset)
+    val lr4 = new LogisticRegression().setInitialModel(model3)
+    val model4 = lr4.fit(multinomialDataset)
+  }
+
   test("logistic regression with all labels the same") {
     val sameLabels = dataset
       .withColumn("zeroLabel", lit(0.0))

From a399ef3ab4b9720f081b2e234f993eef61c5587b Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 09:16:33 -0700
Subject: [PATCH 03/24] fixing some todos, added dual support for weighted
 tests

---
 .../classification/LogisticRegression.scala   |  65 +++++----
 .../LogisticRegressionSuite.scala             | 138 +++++++++++-------
 2 files changed, 120 insertions(+), 83 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 15a2450f464de..aca96aa3ba3a1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -75,16 +75,22 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
 
   /**
-   * Param for the name of family which is a description of the error distribution
+   * Param for the name of family which is a description of the label distribution
    * to be used in the model.
-   * Supported options: "multinomial", "binomial".
-   * Default is "multinomial".
+   * Supported options: "auto", "multinomial", "binomial".
+   * Supported options:
+   *  - "auto": Automatically select the family based on the number of classes:
+   *            If numClasses == 1 || numClasses == 2, set to "binomial".
+   *            Else, set to "multinomial"
+   *  - "binomial": Binary logistic regression with pivoting.
+   *  - "multinomial": Multinomial (softmax) regression without pivoting.
+   * Default is "auto".
    *
    * @group param
    */
   @Since("2.0.0")
   final val family: Param[String] = new Param(this, "family",
-    "The name of family which is a description of the error distribution to be used in the " +
+    "The name of family which is a description of the label distribution to be used in the " +
       s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
     ParamValidators.inArray[String](supportedFamilyNames))
 
@@ -243,14 +249,13 @@ class LogisticRegression @Since("1.2.0") (
 
   /**
    * Sets the value of param [[family]].
-   * Default is "multinomial".
+   * Default is "auto".
    *
    * @group setParam
    */
-  // TODO: don't use strings?
   @Since("2.0.0")
   def setFamily(value: String): this.type = set(family, value)
-  setDefault(family -> "multinomial")
+  setDefault(family -> "auto")
 
   /**
    * Whether to standardize the training features before fitting the model.
@@ -267,6 +272,7 @@ class LogisticRegression @Since("1.2.0") (
   setDefault(standardization -> true)
 
   @Since("1.5.0")
+  // TODO: Check this behavior
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
 
   @Since("1.5.0")
@@ -354,12 +360,12 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
     val isBinaryClassification = numClasses == 1 || numClasses == 2
-    // TODO: use enumeration or similar
-    val isMultinomial = !((!isSet(family) && isBinaryClassification) || $(family) == "binomial")
+    val isMultinomial = ($(family) == LogisticRegression.auto && !isBinaryClassification) ||
+      ($(family) == LogisticRegression.multinomial)
     val numCoefficientSets = if (isMultinomial) numClasses else 1
 
     if (!isMultinomial) {
-      require(isBinaryClassification, s"Binomial family only supports 1 or 2" +
+      require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
           s"outcome classes but found $numClasses")
     }
 
@@ -422,7 +428,6 @@ class LogisticRegression @Since("1.2.0") (
           new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
         } else {
           val standardizationParam = $(standardization)
-          // TODO: check this works in both cases
           def regParamL1Fun = (index: Int) => {
             // Remove the L1 penalization on the intercept
             val isIntercept = $(fitIntercept) && ((index + 1) % numFeaturesPlusIntercept == 0)
@@ -453,14 +458,8 @@ class LogisticRegression @Since("1.2.0") (
           new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
         }
 
-        // TODO: double check this
-        val initialCoefficientsWithIntercept = if (isMultinomial) {
-          Vectors.zeros(numClasses * numFeaturesPlusIntercept)
-        } else {
-          Vectors.zeros(numFeaturesPlusIntercept)
-        }
+        val initialCoefficientsWithIntercept = Vectors.zeros(numCoefficientSets * numFeatures)
 
-        // TODO: need to add this for multinomial case
         val initialModelIsValid = optInitialModel.exists { model =>
           val providedCoefs = model.coefficientMatrix
           val modelValid = (providedCoefs.numRows == numCoefficientSets) &&
@@ -619,15 +618,19 @@ class LogisticRegression @Since("1.2.0") (
 
     val model = copyValues(new LogisticRegressionModel(uid, coefficients, intercept, numClasses,
       isMultinomial))
-    // TODO: need to implement model summary for MLOR... probably best to do it in another JIRA
-    val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
-    val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
-      summaryModel.transform(dataset),
-      probabilityColName,
-      $(labelCol),
-      $(featuresCol),
-      objectiveHistory)
-    val m = model.setSummary(logRegSummary)
+    // TODO: implement summary model for multinomial case
+    val m = if (!isMultinomial) {
+      val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
+      val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
+        summaryModel.transform(dataset),
+        probabilityColName,
+        $(labelCol),
+        $(featuresCol),
+        objectiveHistory)
+      model.setSummary(logRegSummary)
+    } else {
+      model
+    }
     instr.logSuccess(m)
     m
   }
@@ -642,7 +645,11 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
   @Since("1.6.0")
   override def load(path: String): LogisticRegression = super.load(path)
 
-  private[classification] lazy val supportedFamilyNames = Array("binomial", "multinomial")
+  private val multinomial = "multinomial"
+  private val binomial = "binomial"
+  private val auto = "auto"
+
+  private[classification] lazy val supportedFamilyNames = Array(auto, binomial, multinomial)
 }
 
 /**
@@ -891,7 +898,7 @@ class LogisticRegressionModel private[spark] (
     if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
     newModel.setParent(parent)
   }
-  // TODO: basically check all these methods
+
   override protected def raw2prediction(rawPrediction: Vector): Double = {
     if (isMultinomial) {
       super.raw2prediction(rawPrediction)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index a0af82c2ea42c..899158e45954a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -85,6 +85,9 @@ class LogisticRegressionSuite
     binaryDataset.rdd.map { case Row(label: Double, features: Vector) =>
       label + "," + features.toArray.mkString(",")
     }.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/binaryDataset")
+    multinomialDataset.rdd.map { case Row(label: Double, features: Vector) =>
+      label + "," + features.toArray.mkString(",")
+    }.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDataset")
   }
 
   test("params") {
@@ -100,6 +103,7 @@ class LogisticRegressionSuite
     assert(lr.getPredictionCol === "prediction")
     assert(lr.getRawPredictionCol === "rawPrediction")
     assert(lr.getProbabilityCol === "probability")
+    assert(lr.getFamily === "multinomial")
     assert(!lr.isDefined(lr.weightCol))
     assert(lr.getFitIntercept)
     assert(lr.getStandardization)
@@ -221,7 +225,6 @@ class LogisticRegressionSuite
   }
 
   test("logistic regression: Predictor, Classifier methods") {
-    val spark = this.spark
     val lr = new LogisticRegression
 
     val model = lr.fit(dataset)
@@ -811,6 +814,7 @@ class LogisticRegressionSuite
   }
 
   test("evaluate on test set") {
+    // TODO: add for multiclass
     // Evaluate on test set should be same as that of the transformed training data.
     val lr = new LogisticRegression()
       .setMaxIter(10)
@@ -845,63 +849,89 @@ class LogisticRegressionSuite
 
   }
 
-  test("binary logistic regression with weighted samples") {
-    val (dataset, weightedDataset) = {
-      val nPoints = 1000
-      val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
-      val xMean = Array(5.843, 3.057, 3.758, 1.199)
-      val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
-      val testData =
-        generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
-
-      // Let's over-sample the positive samples twice.
-      val data1 = testData.flatMap { case labeledPoint: LabeledPoint =>
-        if (labeledPoint.label == 1.0) {
-          Iterator(labeledPoint, labeledPoint)
-        } else {
-          Iterator(labeledPoint)
-        }
-      }
+  test("binary logistic regression with weighted data") {
+    val numClasses = 2
+    val numPoints = 40
+    val outlierData = MLTestingUtils.genClassificationInstancesWithWeightedOutliers(spark,
+      numClasses, numPoints)
+    val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
+      LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
+    })
+    val lr = new LogisticRegression().setWeightCol("weight")
+    val model = lr.fit(outlierData)
+    val results = model.transform(testData).select("label", "prediction").collect()
+
+    // check that the predictions are the one to one mapping
+    results.foreach { case Row(label: Double, pred: Double) =>
+      assert(label === pred)
+    }
+    val (overSampledData, weightedData) =
+      MLTestingUtils.genEquivalentOversampledAndWeightedInstances(outlierData, "label", "features",
+        42L)
+    val weightedModel = lr.fit(weightedData)
+    val overSampledModel = lr.setWeightCol("").fit(overSampledData)
+    assert(weightedModel.coefficientMatrix ~== overSampledModel.coefficientMatrix relTol 0.01)
+  }
 
-      val rnd = new Random(8392)
-      val data2 = testData.flatMap { case LabeledPoint(label: Double, features: Vector) =>
-        if (rnd.nextGaussian() > 0.0) {
-          if (label == 1.0) {
-            Iterator(
-              Instance(label, 1.2, features),
-              Instance(label, 0.8, features),
-              Instance(0.0, 0.0, features))
-          } else {
-            Iterator(
-              Instance(label, 0.3, features),
-              Instance(1.0, 0.0, features),
-              Instance(label, 0.1, features),
-              Instance(label, 0.6, features))
-          }
-        } else {
-          if (label == 1.0) {
-            Iterator(Instance(label, 2.0, features))
-          } else {
-            Iterator(Instance(label, 1.0, features))
-          }
-        }
-      }
+  test("multinomial logistic regression with weighted data") {
+    val numClasses = 5
+    val numPoints = 40
+    val outlierData = MLTestingUtils.genClassificationInstancesWithWeightedOutliers(spark,
+      numClasses, numPoints)
+    val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
+      LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
+    })
+    val mlr = new LogisticRegression().setWeightCol("weight")
+    val model = mlr.fit(outlierData)
+    val results = model.transform(testData).select("label", "prediction").collect()
+
+    // check that the predictions are the one to one mapping
+    results.foreach { case Row(label: Double, pred: Double) =>
+      assert(label === pred)
+    }
+    val (overSampledData, weightedData) =
+      MLTestingUtils.genEquivalentOversampledAndWeightedInstances(outlierData, "label", "features",
+        42L)
+    val weightedModel = mlr.fit(weightedData)
+    val overSampledModel = mlr.setWeightCol("").fit(overSampledData)
+    assert(weightedModel.coefficientMatrix ~== overSampledModel.coefficientMatrix relTol 0.01)
+  }
 
-      (spark.createDataFrame(sc.parallelize(data1, 4)),
-        spark.createDataFrame(sc.parallelize(data2, 4)))
+  test("set family") {
+    val lr = new LogisticRegression().setMaxIter(1)
+    // don't set anything for binary classification
+    val model1 = lr.fit(binaryDataset)
+    assert(model1.coefficientMatrix.numRows === 1 && model1.coefficientMatrix.numCols === 4)
+    assert(model1.interceptVector.size === 1)
+
+    // set to multinomial for binary classification
+    val model2 = lr.setFamily("multinomial").fit(binaryDataset)
+    assert(model2.coefficientMatrix.numRows === 2 && model2.coefficientMatrix.numCols === 4)
+    assert(model2.interceptVector.size === 2)
+
+    // set to binary for binary classification
+    val model3 = lr.setFamily("binomial").fit(binaryDataset)
+    assert(model3.coefficientMatrix.numRows === 1 && model3.coefficientMatrix.numCols === 4)
+    assert(model3.interceptVector.size === 1)
+
+    // don't set anything for multiclass classification
+    val mlr = new LogisticRegression().setMaxIter(1)
+    val model4 = mlr.fit(multinomialDataset)
+    assert(model4.coefficientMatrix.numRows === 3 && model4.coefficientMatrix.numCols === 4)
+    assert(model4.interceptVector.size === 3)
+
+    // set to binary for multiclass classification
+    mlr.setFamily("binomial")
+    val thrown = intercept[IllegalArgumentException] {
+      mlr.fit(multinomialDataset)
     }
+    assert(thrown.getMessage.contains("Binomial family only supports 1 or 2 outcome classes"))
 
-    val trainer1a = (new LogisticRegression).setFitIntercept(true)
-      .setRegParam(0.0).setStandardization(true)
-    val trainer1b = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
-      .setRegParam(0.0).setStandardization(true)
-    val model1a0 = trainer1a.fit(dataset)
-    val model1a1 = trainer1a.fit(weightedDataset)
-    val model1b = trainer1b.fit(weightedDataset)
-    assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
-    assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
-    assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
-    assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
+    // set to multinomial for multiclass
+    mlr.setFamily("multinomial")
+    val model5 = mlr.fit(multinomialDataset)
+    assert(model5.coefficientMatrix.numRows === 3 && model5.coefficientMatrix.numCols === 4)
+    assert(model5.interceptVector.size === 3)
   }
 
   test("set initial model") {

From a35469019ba6ca0cb0fd9877c28ae02aba46d337 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 13:11:44 -0700
Subject: [PATCH 04/24] all auxiliary tests are merged to LOR, and added
 initial model test

---
 .../classification/LogisticRegression.scala   |   6 +-
 .../LogisticRegressionSuite.scala             | 315 ++++++++++++++++--
 .../MultinomialLogisticRegressionSuite.scala  | 264 ++-------------
 3 files changed, 322 insertions(+), 263 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index aca96aa3ba3a1..9b1845eaef98a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -458,7 +458,8 @@ class LogisticRegression @Since("1.2.0") (
           new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
         }
 
-        val initialCoefficientsWithIntercept = Vectors.zeros(numCoefficientSets * numFeatures)
+        val initialCoefficientsWithIntercept =
+          Vectors.zeros(numCoefficientSets * numFeaturesPlusIntercept)
 
         val initialModelIsValid = optInitialModel.exists { model =>
           val providedCoefs = model.coefficientMatrix
@@ -678,7 +679,7 @@ class LogisticRegressionModel private[spark] (
   @Since("1.3.0")
   def intercept: Double = {
     if (isMultinomial) {
-      logWarning("Multiclass model contains an vector of intercepts, use interceptVector instead." +
+      logWarning("Multiclass model contains a vector of intercepts, use interceptVector instead." +
         "Returning 0.0 as placeholder.")
     }
     _intercept
@@ -940,6 +941,7 @@ class LogisticRegressionModel private[spark] (
 
 @Since("1.6.0")
 object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
+  // TODO: we need to be able to load old models as well
 
   @Since("1.6.0")
   override def read: MLReader[LogisticRegressionModel] = new LogisticRegressionModelReader
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 899158e45954a..a8e94fafa50ed 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.classification
 
+import org.apache.spark.ml.attribute.NominalAttribute
+
 import scala.collection.JavaConverters._
 import scala.language.existentials
 import scala.util.Random
@@ -25,7 +27,7 @@ import scala.util.control.Breaks._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.linalg.{DenseMatrix, Vector, Vectors}
+import org.apache.spark.ml.linalg.{Matrices, DenseMatrix, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -36,7 +38,8 @@ import org.apache.spark.sql.functions.lit
 class LogisticRegressionSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
-  @transient var dataset: Dataset[_] = _
+  @transient var smallBinaryDataset: Dataset[_] = _
+  @transient var smallMultinomialDataset: Dataset[_] = _
   @transient var binaryDataset: Dataset[_] = _
   @transient var multinomialDataset: Dataset[_] = _
   private val eps: Double = 1e-5
@@ -44,7 +47,25 @@ class LogisticRegressionSuite
   override def beforeAll(): Unit = {
     super.beforeAll()
 
-    dataset = spark.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42))
+    smallBinaryDataset =
+      spark.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42))
+
+    smallMultinomialDataset = {
+      val nPoints = 100
+      val coefficients = Array(
+        -0.57997, 0.912083, -0.371077,
+        -0.16624, -0.84355, -0.048509)
+
+      val xMean = Array(5.843, 3.057)
+      val xVariance = Array(0.6856, 0.1899)
+
+      val testData = generateMultinomialLogisticInput(
+        coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
+
+      val df = spark.createDataFrame(sc.parallelize(testData, 4))
+      df.cache()
+      df
+    }
 
     binaryDataset = {
       val nPoints = 10000
@@ -78,7 +99,7 @@ class LogisticRegressionSuite
   }
 
   /**
-   * Enable the ignored test to export the dataset into CSV format,
+   * Enable the ignored test to export the smallBinaryDataset into CSV format,
    * so we can validate the training accuracy compared with R's glmnet package.
    */
   ignore("export test data into CSV format") {
@@ -103,12 +124,12 @@ class LogisticRegressionSuite
     assert(lr.getPredictionCol === "prediction")
     assert(lr.getRawPredictionCol === "rawPrediction")
     assert(lr.getProbabilityCol === "probability")
-    assert(lr.getFamily === "multinomial")
+    assert(lr.getFamily === "auto")
     assert(!lr.isDefined(lr.weightCol))
     assert(lr.getFitIntercept)
     assert(lr.getStandardization)
-    val model = lr.fit(dataset)
-    model.transform(dataset)
+    val model = lr.fit(smallBinaryDataset)
+    model.transform(smallBinaryDataset)
       .select("label", "probability", "prediction", "rawPrediction")
       .collect()
     assert(model.getThreshold === 0.5)
@@ -122,11 +143,11 @@ class LogisticRegressionSuite
 
   test("empty probabilityCol") {
     val lr = new LogisticRegression().setProbabilityCol("")
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     assert(model.hasSummary)
     // Validate that we re-insert a probability column for evaluation
     val fieldNames = model.summary.predictions.schema.fieldNames
-    assert(dataset.schema.fieldNames.toSet.subsetOf(
+    assert(smallBinaryDataset.schema.fieldNames.toSet.subsetOf(
       fieldNames.toSet))
     assert(fieldNames.exists(s => s.startsWith("probability_")))
   }
@@ -163,17 +184,59 @@ class LogisticRegressionSuite
     // thresholds and threshold must be consistent: values
     withClue("fit with ParamMap should throw error if threshold, thresholds do not match.") {
       intercept[IllegalArgumentException] {
-        val lr2model = lr2.fit(dataset,
+        val lr2model = lr2.fit(smallBinaryDataset,
           lr2.thresholds -> Array(0.3, 0.7), lr2.threshold -> (expectedThreshold / 2.0))
         lr2model.getThreshold
       }
     }
   }
 
+  test("thresholds prediction") {
+    val blr = new LogisticRegression().setFamily("binomial")
+    val binaryModel = blr.fit(smallBinaryDataset)
+
+    binaryModel.setThreshold(1.0)
+    val binaryZeroPredictions =
+      binaryModel.transform(smallBinaryDataset).select("prediction").collect()
+    assert(binaryZeroPredictions.forall(_.getDouble(0) === 0.0))
+
+    binaryModel.setThreshold(0.0)
+    val binaryOnePredictions =
+      binaryModel.transform(smallBinaryDataset).select("prediction").collect()
+    assert(binaryOnePredictions.forall(_.getDouble(0) === 1.0))
+
+
+    val mlr = new LogisticRegression().setFamily("multinomial")
+    val model = mlr.fit(smallMultinomialDataset)
+    val basePredictions = model.transform(smallMultinomialDataset).select("prediction").collect()
+
+    // should predict all zeros
+    model.setThresholds(Array(1, 1000, 1000))
+    val zeroPredictions = model.transform(smallMultinomialDataset).select("prediction").collect()
+    assert(zeroPredictions.forall(_.getDouble(0) === 0.0))
+
+    // should predict all ones
+    model.setThresholds(Array(1000, 1, 1000))
+    val onePredictions = model.transform(smallMultinomialDataset).select("prediction").collect()
+    assert(onePredictions.forall(_.getDouble(0) === 1.0))
+
+    // should predict all twos
+    model.setThresholds(Array(1000, 1000, 1))
+    val twoPredictions = model.transform(smallMultinomialDataset).select("prediction").collect()
+    assert(twoPredictions.forall(_.getDouble(0) === 2.0))
+
+    // constant threshold scaling is the same as no thresholds
+    model.setThresholds(Array(1000, 1000, 1000))
+    val scaledPredictions = model.transform(smallMultinomialDataset).select("prediction").collect()
+    assert(scaledPredictions.zip(basePredictions).forall { case (scaled, base) =>
+      scaled.getDouble(0) === base.getDouble(0)
+    })
+  }
+
   test("logistic regression doesn't fit intercept when fitIntercept is off") {
     val lr = new LogisticRegression
     lr.setFitIntercept(false)
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     assert(model.intercept === 0.0)
 
     // copied model must have the same parent.
@@ -187,7 +250,7 @@ class LogisticRegressionSuite
       .setRegParam(1.0)
       .setThreshold(0.6)
       .setProbabilityCol("myProbability")
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     val parent = model.parent.asInstanceOf[LogisticRegression]
     assert(parent.getMaxIter === 10)
     assert(parent.getRegParam === 1.0)
@@ -196,16 +259,16 @@ class LogisticRegressionSuite
 
     // Modify model params, and check that the params worked.
     model.setThreshold(1.0)
-    val predAllZero = model.transform(dataset)
+    val predAllZero = model.transform(smallBinaryDataset)
       .select("prediction", "myProbability")
       .collect()
       .map { case Row(pred: Double, prob: Vector) => pred }
     assert(predAllZero.forall(_ === 0),
       s"With threshold=1.0, expected predictions to be all 0, but only" +
-      s" ${predAllZero.count(_ === 0)} of ${dataset.count()} were 0.")
+      s" ${predAllZero.count(_ === 0)} of ${smallBinaryDataset.count()} were 0.")
     // Call transform with params, and check that the params worked.
     val predNotAllZero =
-      model.transform(dataset, model.threshold -> 0.0,
+      model.transform(smallBinaryDataset, model.threshold -> 0.0,
         model.probabilityCol -> "myProb")
         .select("prediction", "myProb")
         .collect()
@@ -214,7 +277,7 @@ class LogisticRegressionSuite
 
     // Call fit() with new params, and check as many params as we can.
     lr.setThresholds(Array(0.6, 0.4))
-    val model2 = lr.fit(dataset, lr.maxIter -> 5, lr.regParam -> 0.1,
+    val model2 = lr.fit(smallBinaryDataset, lr.maxIter -> 5, lr.regParam -> 0.1,
       lr.probabilityCol -> "theProb")
     val parent2 = model2.parent.asInstanceOf[LogisticRegression]
     assert(parent2.getMaxIter === 5)
@@ -224,16 +287,63 @@ class LogisticRegressionSuite
     assert(model2.getProbabilityCol === "theProb")
   }
 
-  test("logistic regression: Predictor, Classifier methods") {
+  test("multinomial logistic regression: Predictor, Classifier methods") {
+    val mlr = new LogisticRegression
+
+    val model = mlr.fit(smallMultinomialDataset)
+    assert(model.numClasses === 3)
+    val numFeatures = smallMultinomialDataset.select("features").first().getAs[Vector](0).size
+    assert(model.numFeatures === numFeatures)
+
+    val results = model.transform(smallMultinomialDataset)
+    // check that raw prediction is coefficients dot features + intercept
+    results.select("rawPrediction", "features").collect().foreach {
+      case Row(raw: Vector, features: Vector) =>
+        assert(raw.size === 3)
+        val margins = Array.tabulate(3) { k =>
+          var margin = 0.0
+          features.foreachActive { (index, value) =>
+            margin += value * model.coefficientMatrix(k, index)
+          }
+          margin += model.interceptVector(k)
+          margin
+        }
+        assert(raw ~== Vectors.dense(margins) relTol eps)
+    }
+
+    // Compare rawPrediction with probability
+    results.select("rawPrediction", "probability").collect().foreach {
+      case Row(raw: Vector, prob: Vector) =>
+        assert(raw.size === 3)
+        assert(prob.size === 3)
+        val max = raw.toArray.max
+        val subtract = if (max > 0) max else 0.0
+        val sum = raw.toArray.map(x => math.exp(x - subtract)).sum
+        val probFromRaw0 = math.exp(raw(0) - subtract) / sum
+        val probFromRaw1 = math.exp(raw(1) - subtract) / sum
+        assert(prob(0) ~== probFromRaw0 relTol eps)
+        assert(prob(1) ~== probFromRaw1 relTol eps)
+        assert(prob(2) ~== 1.0 - probFromRaw1 - probFromRaw0 relTol eps)
+    }
+
+    // Compare prediction with probability
+    results.select("prediction", "probability").collect().foreach {
+      case Row(pred: Double, prob: Vector) =>
+        val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2
+        assert(pred == predFromProb)
+    }
+  }
+
+  test("binary logistic regression: Predictor, Classifier methods") {
     val lr = new LogisticRegression
 
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     assert(model.numClasses === 2)
-    val numFeatures = dataset.select("features").first().getAs[Vector](0).size
+    val numFeatures = smallBinaryDataset.select("features").first().getAs[Vector](0).size
     assert(model.numFeatures === numFeatures)
 
     val threshold = model.getThreshold
-    val results = model.transform(dataset)
+    val results = model.transform(smallBinaryDataset)
 
     // Compare rawPrediction with probability
     results.select("rawPrediction", "probability").collect().foreach {
@@ -253,6 +363,29 @@ class LogisticRegressionSuite
     }
   }
 
+  test("overflow prediction for multiclass") {
+    val model = new LogisticRegressionModel("mLogReg",
+      Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
+      Vectors.dense(0.0, 0.0, 0.0), 3, true)
+    val overFlowData = spark.createDataFrame(Seq(
+      LabeledPoint(1.0, Vectors.dense(0.0, 1000.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0, -1.0))
+    ))
+    val results = model.transform(overFlowData).select("rawPrediction", "probability").collect()
+
+    // probabilities are correct when margins have to be adjusted
+    val raw1 = results(0).getAs[Vector](0)
+    val prob1 = results(0).getAs[Vector](1)
+    assert(raw1 === Vectors.dense(1000.0, 2000.0, 3000.0))
+    assert(prob1 ~== Vectors.dense(0.0, 0.0, 1.0) absTol eps)
+
+    // probabilities are correct when margins don't have to be adjusted
+    val raw2 = results(1).getAs[Vector](0)
+    val prob2 = results(1).getAs[Vector](1)
+    assert(raw2 === Vectors.dense(-1.0, -2.0, -3.0))
+    assert(prob2 ~== Vectors.dense(0.66524096, 0.24472847, 0.09003057) relTol eps)
+  }
+
   test("MultiClassSummarizer") {
     val summarizer1 = (new MultiClassSummarizer)
       .add(0.0).add(3.0).add(4.0).add(3.0).add(6.0)
@@ -789,6 +922,7 @@ class LogisticRegressionSuite
     assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
 
     /*
+       TODO: why is this needed? The correctness of L1 regularization is already checked elsewhere
        Using the following R code to load the data and train the model using glmnet package.
 
        library("glmnet")
@@ -813,17 +947,69 @@ class LogisticRegressionSuite
     assert(model1.coefficients ~== coefficientsR absTol 1E-6)
   }
 
+  test("multinomial logistic regression with intercept with strong L1 regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(6.0).setStandardization(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(6.0).setStandardization(false)
+
+    val sqlContext = multinomialDataset.sqlContext
+    import sqlContext.implicits._
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    val histogram = multinomialDataset.as[LabeledPoint].rdd.map(_.label)
+      .treeAggregate(new MultiClassSummarizer)(
+        seqOp = (c, v) => (c, v) match {
+          case (classSummarizer: MultiClassSummarizer, label: Double) => classSummarizer.add(label)
+        },
+        combOp = (c1, c2) => (c1, c2) match {
+          case (classSummarizer1: MultiClassSummarizer, classSummarizer2: MultiClassSummarizer) =>
+            classSummarizer1.merge(classSummarizer2)
+        }).histogram
+    val numFeatures = multinomialDataset.as[LabeledPoint].first().features.size
+    val numClasses = histogram.length
+
+    /*
+       For multinomial logistic regression with strong L1 regularization, all the coefficients
+       will be zeros. As a result, the intercepts will be proportional to the log counts in the
+       histogram.
+       {{{
+         \exp(b_k) = count_k * \exp(\lambda)
+         b_k = \log(count_k) * \lambda
+       }}}
+       \lambda is a free parameter, so choose the phase \lambda such that the
+       mean is centered. This yields
+       {{{
+         b_k = \log(count_k)
+         b_k' = b_k - \mean(b_k)
+       }}}
+     */
+    val rawInterceptsTheory = histogram.map(c => math.log(c + 1)) // add 1 for smoothing
+    val rawMean = rawInterceptsTheory.sum / rawInterceptsTheory.length
+    val interceptsTheory = Vectors.dense(rawInterceptsTheory.map(_ - rawMean))
+    val coefficientsTheory = new DenseMatrix(numClasses, numFeatures,
+      Array.fill[Double](numClasses * numFeatures)(0.0), isTransposed = true)
+
+    assert(model1.interceptVector ~== interceptsTheory relTol 1E-3)
+    assert(model1.coefficientMatrix ~= coefficientsTheory absTol 1E-6)
+
+    assert(model2.interceptVector ~== interceptsTheory relTol 1E-3)
+    assert(model2.coefficientMatrix ~= coefficientsTheory absTol 1E-6)
+  }
+
   test("evaluate on test set") {
-    // TODO: add for multiclass
+    // TODO: add for multiclass when model summary becomes available
     // Evaluate on test set should be same as that of the transformed training data.
     val lr = new LogisticRegression()
       .setMaxIter(10)
       .setRegParam(1.0)
       .setThreshold(0.6)
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     val summary = model.summary.asInstanceOf[BinaryLogisticRegressionSummary]
 
-    val sameSummary = model.evaluate(dataset).asInstanceOf[BinaryLogisticRegressionSummary]
+    val sameSummary =
+      model.evaluate(smallBinaryDataset).asInstanceOf[BinaryLogisticRegressionSummary]
     assert(summary.areaUnderROC === sameSummary.areaUnderROC)
     assert(summary.roc.collect() === sameSummary.roc.collect())
     assert(summary.pr.collect === sameSummary.pr.collect())
@@ -840,7 +1026,7 @@ class LogisticRegressionSuite
       .setMaxIter(10)
       .setRegParam(1.0)
       .setThreshold(0.6)
-    val model = lr.fit(dataset)
+    val model = lr.fit(smallBinaryDataset)
     assert(
       model.summary
         .objectiveHistory
@@ -934,9 +1120,16 @@ class LogisticRegressionSuite
     assert(model5.interceptVector.size === 3)
   }
 
+  test("intercept priors") {
+    // TODO
+    // Get coefficients from normal model with strong L1
+    // Set initial model with computed priors...
+  }
+
   test("set initial model") {
     // TODO: the binary one doesn't converge any faster
     // TODO: should they converge after one or two iterations?
+    // We can just run the other ones for a few iterations then check the predictions
     val lr = new LogisticRegression()
     val model1 = lr.fit(binaryDataset)
     val lr2 = new LogisticRegression().setInitialModel(model1)
@@ -949,7 +1142,7 @@ class LogisticRegressionSuite
   }
 
   test("logistic regression with all labels the same") {
-    val sameLabels = dataset
+    val sameLabels = smallBinaryDataset
       .withColumn("zeroLabel", lit(0.0))
       .withColumn("oneLabel", lit(1.0))
 
@@ -990,6 +1183,76 @@ class LogisticRegressionSuite
     assert(allOneNoInterceptModel.summary.totalIterations > 0)
   }
 
+  test("multiclass logistic regression with all labels the same") {
+    val constantData = spark.createDataFrame(Seq(
+      LabeledPoint(4.0, Vectors.dense(0.0)),
+      LabeledPoint(4.0, Vectors.dense(1.0)),
+      LabeledPoint(4.0, Vectors.dense(2.0)))
+    )
+    val mlr = new LogisticRegression().setFamily("multinomial")
+    val model = mlr.fit(constantData)
+    val results = model.transform(constantData)
+    results.select("rawPrediction", "probability", "prediction").collect().foreach {
+      case Row(raw: Vector, prob: Vector, pred: Double) =>
+        assert(raw === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, Double.PositiveInfinity)))
+        assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0)))
+        assert(pred === 4.0)
+    }
+
+    // force the model to be trained with only one class
+    val constantZeroData = spark.createDataFrame(Seq(
+      LabeledPoint(0.0, Vectors.dense(0.0)),
+      LabeledPoint(0.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(2.0)))
+    )
+    val modelZeroLabel = mlr.setFitIntercept(false).fit(constantZeroData)
+    val resultsZero = modelZeroLabel.transform(constantZeroData)
+    resultsZero.select("rawPrediction", "probability", "prediction").collect().foreach {
+      case Row(raw: Vector, prob: Vector, pred: Double) =>
+        assert(prob === Vectors.dense(Array(1.0)))
+        assert(pred === 0.0)
+    }
+
+    // ensure that the correct value is predicted when numClasses passed through metadata
+    val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(6).toMetadata()
+    val constantDataWithMetadata = constantData
+      .select(constantData("label").as("label", labelMeta), constantData("features"))
+    val modelWithMetadata = mlr.setFitIntercept(true).fit(constantDataWithMetadata)
+    val resultsWithMetadata = modelWithMetadata.transform(constantDataWithMetadata)
+    resultsWithMetadata.select("rawPrediction", "probability", "prediction").collect().foreach {
+      case Row(raw: Vector, prob: Vector, pred: Double) =>
+        assert(raw === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, Double.PositiveInfinity, 0.0)))
+        assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0, 0.0)))
+        assert(pred === 4.0)
+    }
+    // TODO: check num iters is zero when it become available in the model
+  }
+
+  test("numClasses specified in metadata/inferred") {
+    val lr = new LogisticRegression().setMaxIter(1)
+
+    // specify more classes than unique label values
+    val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(4).toMetadata()
+    val df = smallMultinomialDataset.select(smallMultinomialDataset("label").as("label", labelMeta),
+      smallMultinomialDataset("features"))
+    val model1 = lr.fit(df)
+    assert(model1.numClasses === 4)
+    assert(model1.interceptVector.size === 4)
+
+    // specify two classes when there are really three
+    val labelMeta1 = NominalAttribute.defaultAttr.withName("label").withNumValues(2).toMetadata()
+    val df1 = smallMultinomialDataset.select(smallMultinomialDataset("label").as("label", labelMeta1),
+      smallMultinomialDataset("features"))
+    val thrown = intercept[IllegalArgumentException] {
+      lr.fit(df1)
+    }
+    assert(thrown.getMessage.contains("less than the number of unique labels"))
+
+    // lr should infer the number of classes if not specified
+    val model3 = lr.fit(smallMultinomialDataset)
+    assert(model3.numClasses === 3)
+  }
+
   test("read/write") {
     def checkModelData(model: LogisticRegressionModel, model2: LogisticRegressionModel): Unit = {
       assert(model.intercept === model2.intercept)
@@ -998,7 +1261,7 @@ class LogisticRegressionSuite
       assert(model.numFeatures === model2.numFeatures)
     }
     val lr = new LogisticRegression()
-    testEstimatorAndModelReadWrite(lr, dataset, LogisticRegressionSuite.allParamSettings,
+    testEstimatorAndModelReadWrite(lr, smallBinaryDataset, LogisticRegressionSuite.allParamSettings,
       checkModelData)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
index 9c7e08820d93b..9969bb02db04b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
@@ -86,35 +86,35 @@ class MultinomialLogisticRegressionSuite
     rdd.saveAsTextFile("target/tmp/MultinomialLogisticRegressionSuite/multinomialDataset")
   }
 
-  test("params") {
-    ParamsSuite.checkParams(new LogisticRegression)
-    val model = new LogisticRegressionModel("mLogReg",
-      Matrices.dense(2, 1, Array(0.0, 0.0)), Vectors.dense(0.0, 0.0), 2, true)
-    ParamsSuite.checkParams(model)
-  }
-
-  test("multinomial logistic regression: default params") {
-    val mlr = new LogisticRegression
-    assert(mlr.getLabelCol === "label")
-    assert(mlr.getFeaturesCol === "features")
-    assert(mlr.getPredictionCol === "prediction")
-    assert(mlr.getRawPredictionCol === "rawPrediction")
-    assert(mlr.getProbabilityCol === "probability")
-    assert(!mlr.isDefined(mlr.weightCol))
-    assert(!mlr.isDefined(mlr.thresholds))
-    assert(mlr.getFitIntercept)
-    assert(mlr.getStandardization)
-    val model = mlr.fit(dataset)
-    model.transform(dataset)
-      .select("label", "probability", "prediction", "rawPrediction")
-      .collect()
-    assert(model.getFeaturesCol === "features")
-    assert(model.getPredictionCol === "prediction")
-    assert(model.getRawPredictionCol === "rawPrediction")
-    assert(model.getProbabilityCol === "probability")
-    assert(model.interceptVector !== Vectors.dense(0.0, 0.0))
-    assert(model.hasParent)
-  }
+//  test("params") {
+//    ParamsSuite.checkParams(new LogisticRegression)
+//    val model = new LogisticRegressionModel("mLogReg",
+//      Matrices.dense(2, 1, Array(0.0, 0.0)), Vectors.dense(0.0, 0.0), 2, true)
+//    ParamsSuite.checkParams(model)
+//  }
+//
+//  test("multinomial logistic regression: default params") {
+//    val mlr = new LogisticRegression
+//    assert(mlr.getLabelCol === "label")
+//    assert(mlr.getFeaturesCol === "features")
+//    assert(mlr.getPredictionCol === "prediction")
+//    assert(mlr.getRawPredictionCol === "rawPrediction")
+//    assert(mlr.getProbabilityCol === "probability")
+//    assert(!mlr.isDefined(mlr.weightCol))
+//    assert(!mlr.isDefined(mlr.thresholds))
+//    assert(mlr.getFitIntercept)
+//    assert(mlr.getStandardization)
+//    val model = mlr.fit(dataset)
+//    model.transform(dataset)
+//      .select("label", "probability", "prediction", "rawPrediction")
+//      .collect()
+//    assert(model.getFeaturesCol === "features")
+//    assert(model.getPredictionCol === "prediction")
+//    assert(model.getRawPredictionCol === "rawPrediction")
+//    assert(model.getProbabilityCol === "probability")
+//    assert(model.interceptVector !== Vectors.dense(0.0, 0.0))
+//    assert(model.hasParent)
+//  }
 
   test("multinomial logistic regression with intercept without regularization") {
 
@@ -813,202 +813,6 @@ class MultinomialLogisticRegressionSuite
   }
    */
 
-  test("prediction") {
-    val model = new LogisticRegressionModel("mLogReg",
-      Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
-      Vectors.dense(0.0, 0.0, 0.0), 3, true)
-    val overFlowData = spark.createDataFrame(Seq(
-      LabeledPoint(1.0, Vectors.dense(0.0, 1000.0)),
-      LabeledPoint(1.0, Vectors.dense(0.0, -1.0))
-    ))
-    val results = model.transform(overFlowData).select("rawPrediction", "probability").collect()
-
-    // probabilities are correct when margins have to be adjusted
-    val raw1 = results(0).getAs[Vector](0)
-    val prob1 = results(0).getAs[Vector](1)
-    assert(raw1 === Vectors.dense(1000.0, 2000.0, 3000.0))
-    assert(prob1 ~== Vectors.dense(0.0, 0.0, 1.0) absTol eps)
-
-    // probabilities are correct when margins don't have to be adjusted
-    val raw2 = results(1).getAs[Vector](0)
-    val prob2 = results(1).getAs[Vector](1)
-    assert(raw2 === Vectors.dense(-1.0, -2.0, -3.0))
-    assert(prob2 ~== Vectors.dense(0.66524096, 0.24472847, 0.09003057) relTol eps)
-  }
-
-  test("multinomial logistic regression: Predictor, Classifier methods") {
-    val mlr = new LogisticRegression
-
-    val model = mlr.fit(dataset)
-    assert(model.numClasses === 3)
-    val numFeatures = dataset.select("features").first().getAs[Vector](0).size
-    assert(model.numFeatures === numFeatures)
-
-    val results = model.transform(dataset)
-    // check that raw prediction is coefficients dot features + intercept
-    results.select("rawPrediction", "features").collect().foreach {
-      case Row(raw: Vector, features: Vector) =>
-        assert(raw.size === 3)
-        val margins = Array.tabulate(3) { k =>
-          var margin = 0.0
-          features.foreachActive { (index, value) =>
-            margin += value * model.coefficientMatrix(k, index)
-          }
-          margin += model.interceptVector(k)
-          margin
-        }
-        assert(raw ~== Vectors.dense(margins) relTol eps)
-    }
-
-    // Compare rawPrediction with probability
-    results.select("rawPrediction", "probability").collect().foreach {
-      case Row(raw: Vector, prob: Vector) =>
-        assert(raw.size === 3)
-        assert(prob.size === 3)
-        val max = raw.toArray.max
-        val subtract = if (max > 0) max else 0.0
-        val sum = raw.toArray.map(x => math.exp(x - subtract)).sum
-        val probFromRaw0 = math.exp(raw(0) - subtract) / sum
-        val probFromRaw1 = math.exp(raw(1) - subtract) / sum
-        assert(prob(0) ~== probFromRaw0 relTol eps)
-        assert(prob(1) ~== probFromRaw1 relTol eps)
-        assert(prob(2) ~== 1.0 - probFromRaw1 - probFromRaw0 relTol eps)
-    }
-
-    // Compare prediction with probability
-    results.select("prediction", "probability").collect().foreach {
-      case Row(pred: Double, prob: Vector) =>
-        val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2
-        assert(pred == predFromProb)
-    }
-  }
-
-  test("multinomial logistic regression coefficients should be centered") {
-    val mlr = new LogisticRegression().setMaxIter(1)
-    val model = mlr.fit(dataset)
-    assert(model.interceptVector.toArray.sum ~== 0.0 absTol 1e-6)
-    assert(model.coefficientMatrix.toArray.sum ~== 0.0 absTol 1e-6)
-  }
-
-  test("numClasses specified in metadata/inferred") {
-    val mlr = new LogisticRegression().setMaxIter(1)
-
-    // specify more classes than unique label values
-    val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(4).toMetadata()
-    val df = dataset.select(dataset("label").as("label", labelMeta), dataset("features"))
-    val model1 = mlr.fit(df)
-    assert(model1.numClasses === 4)
-    assert(model1.interceptVector.size === 4)
-
-    // specify two classes when there are really three
-    val labelMeta1 = NominalAttribute.defaultAttr.withName("label").withNumValues(2).toMetadata()
-    val df1 = dataset.select(dataset("label").as("label", labelMeta1), dataset("features"))
-    val thrown = intercept[IllegalArgumentException] {
-      mlr.fit(df1)
-    }
-    assert(thrown.getMessage.contains("less than the number of unique labels"))
-
-    // mlr should infer the number of classes if not specified
-    val model3 = mlr.fit(dataset)
-    assert(model3.numClasses === 3)
-  }
-
-  test("all labels the same") {
-    val constantData = spark.createDataFrame(Seq(
-      LabeledPoint(4.0, Vectors.dense(0.0)),
-      LabeledPoint(4.0, Vectors.dense(1.0)),
-      LabeledPoint(4.0, Vectors.dense(2.0)))
-    )
-    val mlr = new LogisticRegression().setFamily("multinomial")
-    val model = mlr.fit(constantData)
-    val results = model.transform(constantData)
-    results.select("rawPrediction", "probability", "prediction").collect().foreach {
-      case Row(raw: Vector, prob: Vector, pred: Double) =>
-        assert(raw === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, Double.PositiveInfinity)))
-        assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0)))
-        assert(pred === 4.0)
-    }
-
-    // force the model to be trained with only one class
-    val constantZeroData = spark.createDataFrame(Seq(
-      LabeledPoint(0.0, Vectors.dense(0.0)),
-      LabeledPoint(0.0, Vectors.dense(1.0)),
-      LabeledPoint(0.0, Vectors.dense(2.0)))
-    )
-    val modelZeroLabel = mlr.setFitIntercept(false).fit(constantZeroData)
-    val resultsZero = modelZeroLabel.transform(constantZeroData)
-    resultsZero.select("rawPrediction", "probability", "prediction").collect().foreach {
-      case Row(raw: Vector, prob: Vector, pred: Double) =>
-        assert(prob === Vectors.dense(Array(1.0)))
-        assert(pred === 0.0)
-    }
-
-    // ensure that the correct value is predicted when numClasses passed through metadata
-    val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(6).toMetadata()
-    val constantDataWithMetadata = constantData
-      .select(constantData("label").as("label", labelMeta), constantData("features"))
-    val modelWithMetadata = mlr.setFitIntercept(true).fit(constantDataWithMetadata)
-    val resultsWithMetadata = modelWithMetadata.transform(constantDataWithMetadata)
-    resultsWithMetadata.select("rawPrediction", "probability", "prediction").collect().foreach {
-      case Row(raw: Vector, prob: Vector, pred: Double) =>
-        assert(raw === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, Double.PositiveInfinity, 0.0)))
-        assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0, 0.0)))
-        assert(pred === 4.0)
-    }
-    // TODO: check num iters is zero when it become available in the model
-  }
-
-  test("weighted data") {
-    val numClasses = 5
-    val numPoints = 40
-    val outlierData = MLTestingUtils.genClassificationInstancesWithWeightedOutliers(spark,
-      numClasses, numPoints)
-    val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
-      LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
-    })
-    val mlr = new LogisticRegression().setWeightCol("weight")
-    val model = mlr.fit(outlierData)
-    val results = model.transform(testData).select("label", "prediction").collect()
-
-    // check that the predictions are the one to one mapping
-    results.foreach { case Row(label: Double, pred: Double) =>
-      assert(label === pred)
-    }
-    val (overSampledData, weightedData) =
-      MLTestingUtils.genEquivalentOversampledAndWeightedInstances(outlierData, "label", "features",
-        42L)
-    val weightedModel = mlr.fit(weightedData)
-    val overSampledModel = mlr.setWeightCol("").fit(overSampledData)
-    assert(weightedModel.coefficientMatrix ~== overSampledModel.coefficientMatrix relTol 0.01)
-  }
-
-  test("thresholds prediction") {
-    val mlr = new LogisticRegression
-    val model = mlr.fit(dataset)
-    val basePredictions = model.transform(dataset).select("prediction").collect()
-
-    // should predict all zeros
-    model.setThresholds(Array(1, 1000, 1000))
-    val zeroPredictions = model.transform(dataset).select("prediction").collect()
-    assert(zeroPredictions.forall(_.getDouble(0) === 0.0))
-
-    // should predict all ones
-    model.setThresholds(Array(1000, 1, 1000))
-    val onePredictions = model.transform(dataset).select("prediction").collect()
-    assert(onePredictions.forall(_.getDouble(0) === 1.0))
-
-    // should predict all twos
-    model.setThresholds(Array(1000, 1000, 1))
-    val twoPredictions = model.transform(dataset).select("prediction").collect()
-    assert(twoPredictions.forall(_.getDouble(0) === 2.0))
-
-    // constant threshold scaling is the same as no thresholds
-    model.setThresholds(Array(1000, 1000, 1000))
-    val scaledPredictions = model.transform(dataset).select("prediction").collect()
-    assert(scaledPredictions.zip(basePredictions).forall { case (scaled, base) =>
-      scaled.getDouble(0) === base.getDouble(0)
-    })
-  }
 
 //  test("read/write") {
 //    def checkModelData(
@@ -1024,16 +828,6 @@ class MultinomialLogisticRegressionSuite
 //      MultinomialLogisticRegressionSuite.allParamSettings,
 //      checkModelData)
 //  }
-
-  test("should support all NumericType labels and not support other types") {
-    val mlr = new LogisticRegression().setMaxIter(1)
-    MLTestingUtils
-      .checkNumericTypes[LogisticRegressionModel, LogisticRegression](
-        mlr, spark) { (expected, actual) =>
-        assert(expected.interceptVector === actual.interceptVector)
-        assert(expected.coefficientMatrix.toArray === actual.coefficients.toArray)
-      }
-  }
 }
 
 object MultinomialLogisticRegressionSuite {

From d95370b9d73cb123657e278c0e297bb13ef18331 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 14:33:34 -0700
Subject: [PATCH 05/24] model loading backward compat

---
 .../classification/LogisticRegression.scala   | 38 +++++++++++++------
 .../MultinomialLogisticRegressionSuite.scala  |  8 ----
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 9b1845eaef98a..e15ebfe00bbac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -602,12 +602,14 @@ class LogisticRegression @Since("1.2.0") (
         /*
           The intercepts are never regularized, so we always center the mean.
          */
+        // TODO: store model coefficients as multinomial representation?
+        // If so, zero out one set of coefs or use the +/- representation
         val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
           val interceptMean = interceptsArray.sum / numClasses
           interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
           Vectors.dense(interceptsArray)
-        } else if (interceptsArray.nonEmpty) {
-          Vectors.dense(interceptsArray)
+        } else if (interceptsArray.length == 2) {
+          Vectors.dense(interceptsArray.head)
         } else {
           Vectors.sparse(numClasses, Seq())
         }
@@ -980,19 +982,33 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
 
     override def load(path: String): LogisticRegressionModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val versionRegex = "([0-9]+)\\.([0-9]+)\\.(.+)".r
+      val versionRegex(major, minor, _) = metadata.sparkVersion
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
 
-      val convertedCoefs = MLUtils.convertMatrixColumnsToML(data, "coefficientMatrix")
-      val converted = MLUtils.convertVectorColumnsToML(convertedCoefs, "interceptVector")
-        .select("numClasses", "numFeatures", "interceptVector", "coefficientMatrix",
-          "isMultinomial")
-      // TODO: numFeatures not needed?
-      val Row(numClasses: Int, numFeatures: Int, interceptVector: Vector,
-        coefficientMatrix: Matrix, isMultinomial: Boolean) = converted.head()
-      val model = new LogisticRegressionModel(metadata.uid, coefficientMatrix, interceptVector,
-        numClasses, isMultinomial)
+      val model = if (major.toInt < 2 || (major.toInt == 2 && minor.toInt == 0)) {
+        // 2.0 and before
+        val Row(numClasses: Int, numFeatures: Int, intercept: Double, coefficients: Vector) =
+          MLUtils.convertVectorColumnsToML(data, "coefficients")
+            .select("numClasses", "numFeatures", "intercept", "coefficients")
+            .head()
+        val coefficientMatrix =
+          new DenseMatrix(1, coefficients.size, coefficients.toArray, isTransposed = true)
+        val interceptVector = Vectors.dense(intercept)
+        new LogisticRegressionModel(metadata.uid, coefficientMatrix,
+          interceptVector, numClasses, isMultinomial = false)
+      } else {
+        // 2.1+
+        val Row(numClasses: Int, numFeatures: Int, interceptVector: Vector,
+        coefficientMatrix: Matrix, isMultinomial: Boolean) = data
+          .select("numClasses", "numFeatures", "interceptVector", "coefficientMatrix",
+            "isMultinomial").head()
+        new LogisticRegressionModel(metadata.uid, coefficientMatrix, interceptVector,
+          numClasses, isMultinomial)
+      }
+
 
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
index 9969bb02db04b..5725a47dd8652 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
@@ -806,14 +806,6 @@ class MultinomialLogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
-  /*
-  test("multinomial logistic regression with intercept with strong L1 regularization") {
-    // TODO: implement this test to check that the priors on the intercepts are correct
-    // TODO: when initial model becomes available
-  }
-   */
-
-
 //  test("read/write") {
 //    def checkModelData(
 //        model: LogisticRegressionModel,

From 942c3b7939879f360ce0a22c57cd6e31293fb044 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 18:27:57 -0700
Subject: [PATCH 06/24] correcting initial model test and deleting multinomial

---
 .../classification/LogisticRegression.scala   | 52 +++++++++++--------
 .../LogisticRegressionSuite.scala             | 36 +++++++------
 2 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index e15ebfe00bbac..ebaaa58065fa4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.classification
 
 import scala.collection.mutable
 
-import breeze.linalg.{DenseVector => BDV}
+import breeze.linalg.{DenseVector => BDV, View}
 import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
 import org.apache.hadoop.fs.Path
 
@@ -83,7 +83,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    *            If numClasses == 1 || numClasses == 2, set to "binomial".
    *            Else, set to "multinomial"
    *  - "binomial": Binary logistic regression with pivoting.
-   *  - "multinomial": Multinomial (softmax) regression without pivoting.
+   *  - "multinomial": Multinomial logistic (softmax) regression without pivoting.
    * Default is "auto".
    *
    * @group param
@@ -181,9 +181,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 }
 
 /**
- * Logistic regression.
- * Currently, this class only supports binary classification.  For multiclass classification,
- * use [[MultinomialLogisticRegression]]
+ * Logistic regression. Supports multinomial logistic (softmax) regression and binomial logistic
+ * regression.
  */
 @Since("1.2.0")
 class LogisticRegression @Since("1.2.0") (
@@ -476,10 +475,11 @@ class LogisticRegression @Since("1.2.0") (
 
         if (initialModelIsValid) {
           val initialCoefArray = initialCoefficientsWithIntercept.toArray
-          val providedCoefArray = optInitialModel.get.coefficientMatrix.toArray
-          providedCoefArray.indices.foreach { i =>
-            val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
-            initialCoefArray(flatIndex) = providedCoefArray(i)
+          val providedCoef = optInitialModel.get.coefficientMatrix
+          providedCoef.foreachActive { (row, col, value) =>
+            val flatIndex = row * numFeaturesPlusIntercept + col
+            // We need to scale the coefficients since they will be trained in the scaled space
+            initialCoefArray(flatIndex) = value * featuresStd(col)
           }
           if ($(fitIntercept)) {
             optInitialModel.get.interceptVector.foreachActive { (index, value) =>
@@ -608,10 +608,10 @@ class LogisticRegression @Since("1.2.0") (
           val interceptMean = interceptsArray.sum / numClasses
           interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
           Vectors.dense(interceptsArray)
-        } else if (interceptsArray.length == 2) {
-          Vectors.dense(interceptsArray.head)
+        } else if (interceptsArray.length == 1) {
+          Vectors.dense(interceptsArray)
         } else {
-          Vectors.sparse(numClasses, Seq())
+          Vectors.sparse(numCoefficientSets, Seq())
         }
         (coefficientMatrix, interceptVector, arrayBuilder.result())
       }
@@ -668,6 +668,7 @@ class LogisticRegressionModel private[spark] (
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams with MLWritable {
 
+  // TODO: remove this
   def this(uid: String, coefficients: Vector, intercept: Double) {
     this(uid,
       new DenseMatrix(1, coefficients.size, coefficients.toArray, isTransposed = true),
@@ -675,19 +676,28 @@ class LogisticRegressionModel private[spark] (
   }
 
   @Since("2.0.0")
-  // TODO: this should convert sparse to sparse and dense to dense
-  val coefficients: Vector = Vectors.dense(coefficientMatrix.toArray)
+  def coefficients: Vector = if (isMultinomial) {
+    throw new SparkException("Multinomial models contain a matrix of coefficients, use" +
+      "coefficientMatrix instead.")
+  } else {
+    _coefficients
+  }
+
+  // convert to appropriate vector representation without replicating data
+  private lazy val _coefficients: Vector = coefficientMatrix match {
+    case dm: DenseMatrix => Vectors.dense(dm.values)
+    case sm: SparseMatrix => Vectors.fromBreeze(sm.asBreeze.flatten(View.Require))
+  }
 
   @Since("1.3.0")
-  def intercept: Double = {
-    if (isMultinomial) {
-      logWarning("Multiclass model contains a vector of intercepts, use interceptVector instead." +
-        "Returning 0.0 as placeholder.")
-    }
+  def intercept: Double = if (isMultinomial) {
+    throw new SparkException("Multiclass model contains a vector of intercepts, use " +
+      "interceptVector instead. Returning 0.0 as placeholder.")
+  } else {
     _intercept
   }
 
-  private val _intercept = if (!isMultinomial) interceptVector.toArray.head else 0.0
+  private lazy val _intercept = interceptVector.toArray.head
 
   @Since("1.5.0")
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
@@ -943,7 +953,6 @@ class LogisticRegressionModel private[spark] (
 
 @Since("1.6.0")
 object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
-  // TODO: we need to be able to load old models as well
 
   @Since("1.6.0")
   override def read: MLReader[LogisticRegressionModel] = new LogisticRegressionModelReader
@@ -1009,7 +1018,6 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
           numClasses, isMultinomial)
       }
 
-
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index a8e94fafa50ed..3a9e0b4f856ca 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1120,25 +1120,26 @@ class LogisticRegressionSuite
     assert(model5.interceptVector.size === 3)
   }
 
-  test("intercept priors") {
-    // TODO
-    // Get coefficients from normal model with strong L1
-    // Set initial model with computed priors...
-  }
-
   test("set initial model") {
-    // TODO: the binary one doesn't converge any faster
-    // TODO: should they converge after one or two iterations?
-    // We can just run the other ones for a few iterations then check the predictions
     val lr = new LogisticRegression()
-    val model1 = lr.fit(binaryDataset)
-    val lr2 = new LogisticRegression().setInitialModel(model1)
-    val model2 = lr2.fit(binaryDataset)
+    val model1 = lr.fit(smallBinaryDataset)
+    val lr2 = new LogisticRegression().setInitialModel(model1).setMaxIter(5)
+    val model2 = lr2.fit(smallBinaryDataset)
+    val predictions1 = model1.transform(smallBinaryDataset).select("prediction").collect()
+    val predictions2 = model2.transform(smallBinaryDataset).select("prediction").collect()
+    predictions1.zip(predictions2).foreach { case (Row(p1: Double), Row(p2: Double)) =>
+      assert(p1 === p2)
+    }
 
     val lr3 = new LogisticRegression()
-    val model3 = lr3.fit(multinomialDataset)
-    val lr4 = new LogisticRegression().setInitialModel(model3)
-    val model4 = lr4.fit(multinomialDataset)
+    val model3 = lr3.fit(smallMultinomialDataset)
+    val lr4 = new LogisticRegression().setInitialModel(model3).setMaxIter(5)
+    val model4 = lr4.fit(smallMultinomialDataset)
+    val predictions3 = model3.transform(smallMultinomialDataset).select("prediction").collect()
+    val predictions4 = model4.transform(smallMultinomialDataset).select("prediction").collect()
+    predictions3.zip(predictions4).foreach { case (Row(p1: Double), Row(p2: Double)) =>
+      assert(p1 === p2)
+    }
   }
 
   test("logistic regression with all labels the same") {
@@ -1241,8 +1242,9 @@ class LogisticRegressionSuite
 
     // specify two classes when there are really three
     val labelMeta1 = NominalAttribute.defaultAttr.withName("label").withNumValues(2).toMetadata()
-    val df1 = smallMultinomialDataset.select(smallMultinomialDataset("label").as("label", labelMeta1),
-      smallMultinomialDataset("features"))
+    val df1 = smallMultinomialDataset
+      .select(smallMultinomialDataset("label").as("label", labelMeta1),
+        smallMultinomialDataset("features"))
     val thrown = intercept[IllegalArgumentException] {
       lr.fit(df1)
     }

From ae6150c33b7e93e5c2b6a7b292953150239d9c25 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 21:20:22 -0700
Subject: [PATCH 07/24] small fixes, remove temp constructor

---
 .../classification/LogisticRegression.scala   | 55 ++++---------------
 .../ProbabilisticClassifier.scala             | 27 +++++++--
 .../classification/LogisticRegression.scala   |  5 +-
 .../LogisticRegressionSuite.scala             |  3 +-
 .../ml/classification/OneVsRestSuite.scala    |  5 +-
 .../spark/ml/tuning/CrossValidatorSuite.scala |  5 +-
 .../ml/tuning/TrainValidationSplitSuite.scala |  5 +-
 7 files changed, 47 insertions(+), 58 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index ebaaa58065fa4..2b3cdc5473529 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -365,7 +365,7 @@ class LogisticRegression @Since("1.2.0") (
 
     if (!isMultinomial) {
       require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
-          s"outcome classes but found $numClasses")
+        s"outcome classes but found $numClasses")
     }
 
     if (isDefined(thresholds)) {
@@ -602,8 +602,6 @@ class LogisticRegression @Since("1.2.0") (
         /*
           The intercepts are never regularized, so we always center the mean.
          */
-        // TODO: store model coefficients as multinomial representation?
-        // If so, zero out one set of coefs or use the +/- representation
         val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
           val interceptMean = interceptsArray.sum / numClasses
           interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
@@ -668,13 +666,6 @@ class LogisticRegressionModel private[spark] (
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams with MLWritable {
 
-  // TODO: remove this
-  def this(uid: String, coefficients: Vector, intercept: Double) {
-    this(uid,
-      new DenseMatrix(1, coefficients.size, coefficients.toArray, isTransposed = true),
-      Vectors.dense(intercept), 2, false)
-  }
-
   @Since("2.0.0")
   def coefficients: Vector = if (isMultinomial) {
     throw new SparkException("Multinomial models contain a matrix of coefficients, use" +
@@ -686,13 +677,14 @@ class LogisticRegressionModel private[spark] (
   // convert to appropriate vector representation without replicating data
   private lazy val _coefficients: Vector = coefficientMatrix match {
     case dm: DenseMatrix => Vectors.dense(dm.values)
+    // TODO: better way to flatten sparse matrix?
     case sm: SparseMatrix => Vectors.fromBreeze(sm.asBreeze.flatten(View.Require))
   }
 
   @Since("1.3.0")
   def intercept: Double = if (isMultinomial) {
-    throw new SparkException("Multiclass model contains a vector of intercepts, use " +
-      "interceptVector instead. Returning 0.0 as placeholder.")
+    throw new SparkException("Multinomial models contain a vector of intercepts, use " +
+      "interceptVector instead.")
   } else {
     _intercept
   }
@@ -730,6 +722,7 @@ class LogisticRegressionModel private[spark] (
   }
 
   /** Score (probability) for each class label. */
+  // TODO: do we need this anymore?
   private val scores: Vector => Vector = (features) => {
     val m = margins(features)
     val maxMarginIndex = m.argmax
@@ -813,36 +806,11 @@ class LogisticRegressionModel private[spark] (
    * Predict label for the given feature vector.
    * The behavior of this can be adjusted using [[thresholds]].
    */
-  override protected def predict(features: Vector): Double = {
+  override protected def predict(features: Vector): Double = if (isMultinomial) {
+    super.predict(features)
+  } else {
     // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
-    if (isMultinomial) {
-      if (isDefined(thresholds)) {
-        val thresholds: Array[Double] = getThresholds
-        val probabilities = scores(features).toArray
-        var argMax = 0
-        var max = Double.NegativeInfinity
-        var i = 0
-        while (i < numClasses) {
-          if (thresholds(i) == 0.0) {
-            max = Double.PositiveInfinity
-            argMax = i
-          } else {
-            val scaled = probabilities(i) / thresholds(i)
-            if (scaled > max) {
-              max = scaled
-              argMax = i
-            }
-          }
-          i += 1
-        }
-        argMax
-      } else {
-        scores(features).argmax
-      }
-    }
-    else {
-      if (score(features) > getThreshold) 1 else 0
-    }
+    if (score(features) > getThreshold) 1 else 0
   }
 
   override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
@@ -930,10 +898,10 @@ class LogisticRegressionModel private[spark] (
   }
 
   override protected def probability2prediction(probability: Vector): Double = {
-    // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
     if (isMultinomial) {
       super.probability2prediction(probability)
     } else {
+      // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
       if (probability(1) > getThreshold) 1 else 0
     }
   }
@@ -983,8 +951,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
     }
   }
 
-  private class LogisticRegressionModelReader
-    extends MLReader[LogisticRegressionModel] {
+  private class LogisticRegressionModelReader extends MLReader[LogisticRegressionModel] {
 
     /** Checked against metadata when loading model */
     private val className = classOf[LogisticRegressionModel].getName
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 19df8f7edd43c..989bd19528a97 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -201,11 +201,30 @@ abstract class ProbabilisticClassificationModel[
       probability.argmax
     } else {
       val thresholds: Array[Double] = getThresholds
-      val scaledProbability: Array[Double] =
-        probability.toArray.zip(thresholds).map { case (p, t) =>
-          if (t == 0.0) Double.PositiveInfinity else p / t
+      val probabilities = probability.toArray
+      var argMax = 0
+      var max = Double.NegativeInfinity
+      var i = 0
+      while (i < probability.size) {
+        if (thresholds(i) == 0.0) {
+          max = Double.PositiveInfinity
+          argMax = i
+        } else {
+          val scaled = probabilities(i) / thresholds(i)
+          if (scaled > max) {
+            max = scaled
+            argMax = i
+          }
         }
-      Vectors.dense(scaledProbability).argmax
+        i += 1
+      }
+      argMax
+//      val thresholds: Array[Double] = getThresholds
+//      val scaledProbability: Array[Double] =
+//        probability.toArray.zip(thresholds).map { case (p, t) =>
+//          if (t == 0.0) Double.PositiveInfinity else p / t
+//        }
+//      Vectors.dense(scaledProbability).argmax
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index ad3dab33d2909..c3770dd0a12df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -430,10 +430,9 @@ class LogisticRegressionWithLBFGS
         lr.setElasticNetParam(elasticNetParam)
         lr.setStandardization(useFeatureScaling)
         if (userSuppliedWeights) {
-          // TODO: check this
           val uid = Identifiable.randomUID("logreg-static")
-          lr.setInitialModel(new org.apache.spark.ml.classification.LogisticRegressionModel(
-            uid, new DenseMatrix(1, initialWeights.size, initialWeights.toArray, isTransposed=true),
+          lr.setInitialModel(new org.apache.spark.ml.classification.LogisticRegressionModel(uid,
+            new DenseMatrix(1, initialWeights.size, initialWeights.toArray, isTransposed = true),
             Vectors.dense(0.0).asML, 2, false))
         }
         lr.setFitIntercept(addIntercept)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 3a9e0b4f856ca..f04d73f979509 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -113,7 +113,8 @@ class LogisticRegressionSuite
 
   test("params") {
     ParamsSuite.checkParams(new LogisticRegression)
-    val model = new LogisticRegressionModel("logReg", Vectors.dense(0.0), 0.0)
+    val model = new LogisticRegressionModel("logReg",
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
     ParamsSuite.checkParams(model)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 361dd74cb082e..09e38786aa002 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.feature.StringIndexer
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MetadataUtils, MLTestingUtils}
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -60,7 +60,8 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
 
   test("params") {
     ParamsSuite.checkParams(new OneVsRest)
-    val lrModel = new LogisticRegressionModel("lr", Vectors.dense(0.0), 0.0)
+    val lrModel = new LogisticRegressionModel("logReg",
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
     val model = new OneVsRestModel("ovr", Metadata.empty, Array(lrModel))
     ParamsSuite.checkParams(model)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index 30bd390381e97..0fb26f26e7792 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressio
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
 import org.apache.spark.ml.feature.HashingTF
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamPair}
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -244,7 +244,8 @@ class CrossValidatorSuite
   test("read/write: CrossValidatorModel") {
     val lr = new LogisticRegression()
       .setThreshold(0.6)
-    val lrModel = new LogisticRegressionModel(lr.uid, Vectors.dense(1.0, 2.0), 1.2)
+    val lrModel = new LogisticRegressionModel(lr.uid,
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
       .setMetricName("areaUnderPR")  // not default metric
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index c1e9c2fc1dc11..a05a1d641f1bb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -133,7 +133,8 @@ class TrainValidationSplitSuite
   test("read/write: TrainValidationSplitModel") {
     val lr = new LogisticRegression()
       .setThreshold(0.6)
-    val lrModel = new LogisticRegressionModel(lr.uid, Vectors.dense(1.0, 2.0), 1.2)
+    val lrModel = new LogisticRegressionModel(lr.uid,
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
     val paramMaps = new ParamGridBuilder()

From 47fa5fde7a0f4ab17042989fb631cf772ff41069 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 25 Aug 2016 21:24:46 -0700
Subject: [PATCH 08/24] rebase

---
 .../MultinomialLogisticRegression.scala       | 632 ------------------
 1 file changed, 632 deletions(-)
 delete mode 100644 mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
deleted file mode 100644
index 006f57c0ce260..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.classification
-
-import scala.collection.mutable
-
-import breeze.linalg.{DenseVector => BDV}
-import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
-import org.apache.hadoop.fs.Path
-
-import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
-import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.Instance
-import org.apache.spark.ml.linalg._
-import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util._
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
-import org.apache.spark.sql.types.DoubleType
-import org.apache.spark.storage.StorageLevel
-
-/**
- * Params for multinomial logistic (softmax) regression.
- */
-private[classification] trait MultinomialLogisticRegressionParams
-  extends ProbabilisticClassifierParams with HasRegParam with HasElasticNetParam with HasMaxIter
-    with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
-    with HasAggregationDepth {
-
-  /**
-   * Set thresholds in multiclass (or binary) classification to adjust the probability of
-   * predicting each class. Array must have length equal to the number of classes, with values >= 0.
-   * The class with largest value p/t is predicted, where p is the original probability of that
-   * class and t is the class' threshold.
-   *
-   * @group setParam
-   */
-  def setThresholds(value: Array[Double]): this.type = {
-    set(thresholds, value)
-  }
-
-  /**
-   * Get thresholds for binary or multiclass classification.
-   *
-   * @group getParam
-   */
-  override def getThresholds: Array[Double] = {
-    $(thresholds)
-  }
-}
-
-/**
- * :: Experimental ::
- * Multinomial Logistic (softmax) regression.
- */
-@Since("2.1.0")
-@Experimental
-class MultinomialLogisticRegression @Since("2.1.0") (
-    @Since("2.1.0") override val uid: String)
-  extends ProbabilisticClassifier[Vector,
-    MultinomialLogisticRegression, MultinomialLogisticRegressionModel]
-    with MultinomialLogisticRegressionParams with DefaultParamsWritable with Logging {
-
-  @Since("2.1.0")
-  def this() = this(Identifiable.randomUID("mlogreg"))
-
-  /**
-   * Set the regularization parameter.
-   * Default is 0.0.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setRegParam(value: Double): this.type = set(regParam, value)
-  setDefault(regParam -> 0.0)
-
-  /**
-   * Set the ElasticNet mixing parameter.
-   * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
-   * For 0 < alpha < 1, the penalty is a combination of L1 and L2.
-   * Default is 0.0 which is an L2 penalty.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
-  setDefault(elasticNetParam -> 0.0)
-
-  /**
-   * Set the maximum number of iterations.
-   * Default is 100.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
-
-  /**
-   * Set the convergence tolerance of iterations.
-   * Smaller value will lead to higher accuracy with the cost of more iterations.
-   * Default is 1E-6.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
-
-  /**
-   * Whether to fit an intercept term.
-   * Default is true.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
-
-  /**
-   * Whether to standardize the training features before fitting the model.
-   * The coefficients of models will be always returned on the original scale,
-   * so it will be transparent for users. Note that with/without standardization,
-   * the models should always converge to the same solution when no regularization
-   * is applied. In R's GLMNET package, the default behavior is true as well.
-   * Default is true.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setStandardization(value: Boolean): this.type = set(standardization, value)
-  setDefault(standardization -> true)
-
-  /**
-   * Sets the value of param [[weightCol]].
-   * If this is not set or empty, we treat all instance weights as 1.0.
-   * Default is not set, so all instances have weight one.
-   *
-   * @group setParam
-   */
-  @Since("2.1.0")
-  def setWeightCol(value: String): this.type = set(weightCol, value)
-
-  @Since("2.1.0")
-  override def setThresholds(value: Array[Double]): this.type = super.setThresholds(value)
-
-  /**
-   * Suggested depth for treeAggregate (>= 2).
-   * If the dimensions of features or the number of partitions are large,
-   * this param could be adjusted to a larger size.
-   * Default is 2.
-   * @group expertSetParam
-   */
-  @Since("2.1.0")
-  def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
-  setDefault(aggregationDepth -> 2)
-
-  override protected[spark] def train(dataset: Dataset[_]): MultinomialLogisticRegressionModel = {
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
-
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
-    if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
-
-    val instr = Instrumentation.create(this, instances)
-    instr.logParams(regParam, elasticNetParam, standardization, thresholds,
-      maxIter, tol, fitIntercept)
-
-    val (summarizer, labelSummarizer) = {
-      val seqOp = (c: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-       instance: Instance) =>
-        (c._1.add(instance.features, instance.weight), c._2.add(instance.label, instance.weight))
-
-      val combOp = (c1: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-        c2: (MultivariateOnlineSummarizer, MultiClassSummarizer)) =>
-          (c1._1.merge(c2._1), c1._2.merge(c2._2))
-
-      instances.treeAggregate(
-        new MultivariateOnlineSummarizer, new MultiClassSummarizer)(seqOp, combOp)
-    }
-
-    val histogram = labelSummarizer.histogram
-    val numInvalid = labelSummarizer.countInvalid
-    val numFeatures = summarizer.mean.size
-    val numFeaturesPlusIntercept = if (getFitIntercept) numFeatures + 1 else numFeatures
-
-    val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
-      case Some(n: Int) =>
-        require(n >= histogram.length, s"Specified number of classes $n was " +
-          s"less than the number of unique labels ${histogram.length}")
-        n
-      case None => histogram.length
-    }
-
-    instr.logNumClasses(numClasses)
-    instr.logNumFeatures(numFeatures)
-
-    val (coefficients, intercepts, objectiveHistory) = {
-      if (numInvalid != 0) {
-        val msg = s"Classification labels should be in {0 to ${numClasses - 1} " +
-          s"Found $numInvalid invalid labels."
-        logError(msg)
-        throw new SparkException(msg)
-      }
-
-      val isConstantLabel = histogram.count(_ != 0) == 1
-
-      if ($(fitIntercept) && isConstantLabel) {
-        // we want to produce a model that will always predict the constant label so all the
-        // coefficients will be zero, and the constant label class intercept will be +inf
-        val constantLabelIndex = Vectors.dense(histogram).argmax
-        (Matrices.sparse(numClasses, numFeatures, Array.fill(numFeatures + 1)(0),
-          Array.empty[Int], Array.empty[Double]),
-          Vectors.sparse(numClasses, Seq((constantLabelIndex, Double.PositiveInfinity))),
-          Array.empty[Double])
-      } else {
-        if (!$(fitIntercept) && isConstantLabel) {
-          logWarning(s"All labels belong to a single class and fitIntercept=false. It's" +
-            s"a dangerous ground, so the algorithm may not converge.")
-        }
-
-        val featuresStd = summarizer.variance.toArray.map(math.sqrt)
-        val featuresMean = summarizer.mean.toArray
-        if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
-          featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) {
-          logWarning("Fitting MultinomialLogisticRegressionModel without intercept on dataset " +
-            "with constant nonzero column, Spark MLlib outputs zero coefficients for constant " +
-            "nonzero columns. This behavior is the same as R glmnet but different from LIBSVM.")
-        }
-
-        val regParamL1 = $(elasticNetParam) * $(regParam)
-        val regParamL2 = (1.0 - $(elasticNetParam)) * $(regParam)
-
-        val bcFeaturesStd = instances.context.broadcast(featuresStd)
-        val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept),
-          $(standardization), bcFeaturesStd, regParamL2, multinomial = true, $(aggregationDepth))
-
-        val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
-          new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
-        } else {
-          val standardizationParam = $(standardization)
-          def regParamL1Fun = (index: Int) => {
-            // Remove the L1 penalization on the intercept
-            val isIntercept = $(fitIntercept) && ((index + 1) % numFeaturesPlusIntercept == 0)
-            if (isIntercept) {
-              0.0
-            } else {
-              if (standardizationParam) {
-                regParamL1
-              } else {
-                val featureIndex = if ($(fitIntercept)) {
-                  index % numFeaturesPlusIntercept
-                } else {
-                  index % numFeatures
-                }
-                // If `standardization` is false, we still standardize the data
-                // to improve the rate of convergence; as a result, we have to
-                // perform this reverse standardization by penalizing each component
-                // differently to get effectively the same objective function when
-                // the training dataset is not standardized.
-                if (featuresStd(featureIndex) != 0.0) {
-                  regParamL1 / featuresStd(featureIndex)
-                } else {
-                  0.0
-                }
-              }
-            }
-          }
-          new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
-        }
-
-        val initialCoefficientsWithIntercept = Vectors.zeros(numClasses * numFeaturesPlusIntercept)
-
-        if ($(fitIntercept)) {
-          /*
-             For multinomial logistic regression, when we initialize the coefficients as zeros,
-             it will converge faster if we initialize the intercepts such that
-             it follows the distribution of the labels.
-             {{{
-               P(1) = \exp(b_1) / Z
-               ...
-               P(K) = \exp(b_K) / Z
-               where Z = \sum_{k=1}^{K} \exp(b_k)
-             }}}
-             Since this doesn't have a unique solution, one of the solutions that satisfies the
-             above equations is
-             {{{
-               \exp(b_k) = count_k * \exp(\lambda)
-               b_k = \log(count_k) * \lambda
-             }}}
-             \lambda is a free parameter, so choose the phase \lambda such that the
-             mean is centered. This yields
-             {{{
-               b_k = \log(count_k)
-               b_k' = b_k - \mean(b_k)
-             }}}
-           */
-          val rawIntercepts = histogram.map(c => math.log(c + 1)) // add 1 for smoothing
-          val rawMean = rawIntercepts.sum / rawIntercepts.length
-          rawIntercepts.indices.foreach { i =>
-            initialCoefficientsWithIntercept.toArray(i * numFeaturesPlusIntercept + numFeatures) =
-              rawIntercepts(i) - rawMean
-          }
-        }
-
-        val states = optimizer.iterations(new CachedDiffFunction(costFun),
-          initialCoefficientsWithIntercept.asBreeze.toDenseVector)
-
-        /*
-           Note that in Multinomial Logistic Regression, the objective history
-           (loss + regularization) is log-likelihood which is invariant under feature
-           standardization. As a result, the objective history from optimizer is the same as the
-           one in the original space.
-         */
-        val arrayBuilder = mutable.ArrayBuilder.make[Double]
-        var state: optimizer.State = null
-        while (states.hasNext) {
-          state = states.next()
-          arrayBuilder += state.adjustedValue
-        }
-
-        if (state == null) {
-          val msg = s"${optimizer.getClass.getName} failed."
-          logError(msg)
-          throw new SparkException(msg)
-        }
-        bcFeaturesStd.destroy(blocking = false)
-
-        /*
-           The coefficients are trained in the scaled space; we're converting them back to
-           the original space.
-           Note that the intercept in scaled space and original space is the same;
-           as a result, no scaling is needed.
-         */
-        val rawCoefficients = state.x.toArray
-        val interceptsArray: Array[Double] = if ($(fitIntercept)) {
-          Array.tabulate(numClasses) { i =>
-            val coefIndex = (i + 1) * numFeaturesPlusIntercept - 1
-            rawCoefficients(coefIndex)
-          }
-        } else {
-          Array.empty
-        }
-
-        val coefficientArray: Array[Double] = Array.tabulate(numClasses * numFeatures) { i =>
-          // flatIndex will loop though rawCoefficients, and skip the intercept terms.
-          val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
-          val featureIndex = i % numFeatures
-          if (featuresStd(featureIndex) != 0.0) {
-            rawCoefficients(flatIndex) / featuresStd(featureIndex)
-          } else {
-            0.0
-          }
-        }
-        val coefficientMatrix =
-          new DenseMatrix(numClasses, numFeatures, coefficientArray, isTransposed = true)
-
-        /*
-          When no regularization is applied, the coefficients lack identifiability because
-          we do not use a pivot class. We can add any constant value to the coefficients and
-          get the same likelihood. So here, we choose the mean centered coefficients for
-          reproducibility. This method follows the approach in glmnet, described here:
-
-          Friedman, et al. "Regularization Paths for Generalized Linear Models via
-            Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
-         */
-        if ($(regParam) == 0.0) {
-          val coefficientMean = coefficientMatrix.values.sum / (numClasses * numFeatures)
-          coefficientMatrix.update(_ - coefficientMean)
-        }
-        /*
-          The intercepts are never regularized, so we always center the mean.
-         */
-        val interceptVector = if (interceptsArray.nonEmpty) {
-          val interceptMean = interceptsArray.sum / numClasses
-          interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
-          Vectors.dense(interceptsArray)
-        } else {
-          Vectors.sparse(numClasses, Seq())
-        }
-
-        (coefficientMatrix, interceptVector, arrayBuilder.result())
-      }
-    }
-
-    if (handlePersistence) instances.unpersist()
-
-    val model = copyValues(
-      new MultinomialLogisticRegressionModel(uid, coefficients, intercepts, numClasses))
-    instr.logSuccess(model)
-    model
-  }
-
-  @Since("2.1.0")
-  override def copy(extra: ParamMap): MultinomialLogisticRegression = defaultCopy(extra)
-}
-
-@Since("2.1.0")
-object MultinomialLogisticRegression extends DefaultParamsReadable[MultinomialLogisticRegression] {
-
-  @Since("2.1.0")
-  override def load(path: String): MultinomialLogisticRegression = super.load(path)
-}
-
-/**
- * :: Experimental ::
- * Model produced by [[MultinomialLogisticRegression]].
- */
-@Since("2.1.0")
-@Experimental
-class MultinomialLogisticRegressionModel private[spark] (
-    @Since("2.1.0") override val uid: String,
-    @Since("2.1.0") val coefficients: Matrix,
-    @Since("2.1.0") val intercepts: Vector,
-    @Since("2.1.0") val numClasses: Int)
-  extends ProbabilisticClassificationModel[Vector, MultinomialLogisticRegressionModel]
-    with MultinomialLogisticRegressionParams with MLWritable {
-
-  @Since("2.1.0")
-  override def setThresholds(value: Array[Double]): this.type = super.setThresholds(value)
-
-  @Since("2.1.0")
-  override def getThresholds: Array[Double] = super.getThresholds
-
-  @Since("2.1.0")
-  override val numFeatures: Int = coefficients.numCols
-
-  /** Margin (rawPrediction) for each class label. */
-  private val margins: Vector => Vector = (features) => {
-    val m = intercepts.toDense.copy
-    BLAS.gemv(1.0, coefficients, features, 1.0, m)
-    m
-  }
-
-  /** Score (probability) for each class label. */
-  private val scores: Vector => Vector = (features) => {
-    val m = margins(features)
-    val maxMarginIndex = m.argmax
-    val marginArray = m.toArray
-    val maxMargin = marginArray(maxMarginIndex)
-
-    // adjust margins for overflow
-    val sum = {
-      var temp = 0.0
-      var k = 0
-      while (k < numClasses) {
-        marginArray(k) = if (maxMargin > 0) {
-          math.exp(marginArray(k) - maxMargin)
-        } else {
-          math.exp(marginArray(k))
-        }
-        temp += marginArray(k)
-        k += 1
-      }
-      temp
-    }
-
-    val scores = Vectors.dense(marginArray)
-    BLAS.scal(1 / sum, scores)
-    scores
-  }
-
-  /**
-   * Predict label for the given feature vector.
-   * The behavior of this can be adjusted using [[thresholds]].
-   */
-  override protected def predict(features: Vector): Double = {
-    if (isDefined(thresholds)) {
-      val thresholds: Array[Double] = getThresholds
-      val probabilities = scores(features).toArray
-      var argMax = 0
-      var max = Double.NegativeInfinity
-      var i = 0
-      while (i < numClasses) {
-        if (thresholds(i) == 0.0) {
-          max = Double.PositiveInfinity
-          argMax = i
-        } else {
-          val scaled = probabilities(i) / thresholds(i)
-          if (scaled > max) {
-            max = scaled
-            argMax = i
-          }
-        }
-        i += 1
-      }
-      argMax
-    } else {
-      scores(features).argmax
-    }
-  }
-
-  override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
-    rawPrediction match {
-      case dv: DenseVector =>
-        val size = dv.size
-        val values = dv.values
-
-        // get the maximum margin
-        val maxMarginIndex = rawPrediction.argmax
-        val maxMargin = rawPrediction(maxMarginIndex)
-
-        if (maxMargin == Double.PositiveInfinity) {
-          var k = 0
-          while (k < size) {
-            values(k) = if (k == maxMarginIndex) 1.0 else 0.0
-            k += 1
-          }
-        } else {
-          val sum = {
-            var temp = 0.0
-            var k = 0
-            while (k < numClasses) {
-              values(k) = if (maxMargin > 0) {
-                math.exp(values(k) - maxMargin)
-              } else {
-                math.exp(values(k))
-              }
-              temp += values(k)
-              k += 1
-            }
-            temp
-          }
-          BLAS.scal(1 / sum, dv)
-        }
-        dv
-      case sv: SparseVector =>
-        throw new RuntimeException("Unexpected error in MultinomialLogisticRegressionModel:" +
-          " raw2probabilitiesInPlace encountered SparseVector")
-    }
-  }
-
-  override protected def predictRaw(features: Vector): Vector = margins(features)
-
-  @Since("2.1.0")
-  override def copy(extra: ParamMap): MultinomialLogisticRegressionModel = {
-    val newModel =
-      copyValues(
-        new MultinomialLogisticRegressionModel(uid, coefficients, intercepts, numClasses), extra)
-    newModel.setParent(parent)
-  }
-
-  /**
-   * Returns a [[org.apache.spark.ml.util.MLWriter]] instance for this ML instance.
-   *
-   * This does not save the [[parent]] currently.
-   */
-  @Since("2.1.0")
-  override def write: MLWriter =
-    new MultinomialLogisticRegressionModel.MultinomialLogisticRegressionModelWriter(this)
-}
-
-
-@Since("2.1.0")
-object MultinomialLogisticRegressionModel extends MLReadable[MultinomialLogisticRegressionModel] {
-
-  @Since("2.1.0")
-  override def read: MLReader[MultinomialLogisticRegressionModel] =
-    new MultinomialLogisticRegressionModelReader
-
-  @Since("2.1.0")
-  override def load(path: String): MultinomialLogisticRegressionModel = super.load(path)
-
-  /** [[MLWriter]] instance for [[MultinomialLogisticRegressionModel]] */
-  private[MultinomialLogisticRegressionModel]
-  class MultinomialLogisticRegressionModelWriter(instance: MultinomialLogisticRegressionModel)
-    extends MLWriter with Logging {
-
-    private case class Data(
-        numClasses: Int,
-        numFeatures: Int,
-        intercepts: Vector,
-        coefficients: Matrix)
-
-    override protected def saveImpl(path: String): Unit = {
-      // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
-      // Save model data: numClasses, numFeatures, intercept, coefficients
-      val data = Data(instance.numClasses, instance.numFeatures, instance.intercepts,
-        instance.coefficients)
-      val dataPath = new Path(path, "data").toString
-      sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
-    }
-  }
-
-  private class MultinomialLogisticRegressionModelReader
-    extends MLReader[MultinomialLogisticRegressionModel] {
-
-    /** Checked against metadata when loading model */
-    private val className = classOf[MultinomialLogisticRegressionModel].getName
-
-    override def load(path: String): MultinomialLogisticRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
-
-      val dataPath = new Path(path, "data").toString
-      val data = sqlContext.read.format("parquet").load(dataPath)
-        .select("numClasses", "numFeatures", "intercepts", "coefficients").head()
-      val numClasses = data.getAs[Int](data.fieldIndex("numClasses"))
-      val intercepts = data.getAs[Vector](data.fieldIndex("intercepts"))
-      val coefficients = data.getAs[Matrix](data.fieldIndex("coefficients"))
-      val model =
-        new MultinomialLogisticRegressionModel(metadata.uid, coefficients, intercepts, numClasses)
-
-      DefaultParamsReader.getAndSetParams(model, metadata)
-      model
-    }
-  }
-}

From 79273f7be4234de0d97347df02518b690fef7119 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 26 Aug 2016 08:21:56 -0700
Subject: [PATCH 09/24] removing old test suite

---
 .../classification/LogisticRegression.scala   |   3 +-
 .../LogisticRegressionSuite.scala             | 710 ++++++++++++++-
 .../MultinomialLogisticRegressionSuite.scala  | 842 ------------------
 3 files changed, 710 insertions(+), 845 deletions(-)
 delete mode 100644 mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 2b3cdc5473529..80426fc019e83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -668,7 +668,7 @@ class LogisticRegressionModel private[spark] (
 
   @Since("2.0.0")
   def coefficients: Vector = if (isMultinomial) {
-    throw new SparkException("Multinomial models contain a matrix of coefficients, use" +
+    throw new SparkException("Multinomial models contain a matrix of coefficients, use " +
       "coefficientMatrix instead.")
   } else {
     _coefficients
@@ -1378,7 +1378,6 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
- *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index f04d73f979509..47c1a7218fcbd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -24,7 +24,7 @@ import scala.language.existentials
 import scala.util.Random
 import scala.util.control.Breaks._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Matrices, DenseMatrix, Vector, Vectors}
@@ -364,6 +364,24 @@ class LogisticRegressionSuite
     }
   }
 
+  test("coefficients and intercept methods") {
+    val mlr = new LogisticRegression().setMaxIter(1)
+    val mlrModel = mlr.fit(smallMultinomialDataset)
+    val thrownCoef = intercept[SparkException] {
+      mlrModel.coefficients
+    }
+    val thrownIntercept = intercept[SparkException] {
+      mlrModel.intercept
+    }
+    assert(thrownCoef.getMessage().contains("use coefficientMatrix instead"))
+    assert(thrownIntercept.getMessage().contains("use interceptVector instead"))
+
+    val blr = new LogisticRegression().setMaxIter(1)
+    val blrModel = blr.fit(smallBinaryDataset)
+    assert(blrModel.coefficients.size === 1)
+    assert(blrModel.intercept !== 0.0)
+  }
+
   test("overflow prediction for multiclass") {
     val model = new LogisticRegressionModel("mLogReg",
       Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
@@ -999,6 +1017,696 @@ class LogisticRegressionSuite
     assert(model2.coefficientMatrix ~= coefficientsTheory absTol 1E-6)
   }
 
+  test("multinomial logistic regression with intercept without regularization") {
+
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setMaxIter(100)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    /*
+       Using the following R code to load the data and train the model using glmnet package.
+       > library("glmnet")
+       > data <- read.csv("path", header=FALSE)
+       > label = as.factor(data$V1)
+       > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+       > coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0, lambda = 0))
+       > coefficients
+        $`0`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+           -2.24493379
+        V2  0.25096771
+        V3 -0.03915938
+        V4  0.14766639
+        V5  0.36810817
+        $`1`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+            0.3778931
+        V2 -0.3327489
+        V3  0.8893666
+        V4 -0.2306948
+        V5 -0.4442330
+        $`2`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+            1.86704066
+        V2  0.08178121
+        V3 -0.85020722
+        V4  0.08302840
+        V5  0.07612480
+     */
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.2509677, -0.0391594, 0.1476664, 0.3681082,
+      -0.3327489, 0.8893666, -0.2306948, -0.4442330,
+      0.0817812, -0.8502072, 0.0830284, 0.0761248), isTransposed = true)
+    val interceptsR = Vectors.dense(-2.2449338, 0.3778931, 1.8670407)
+
+    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.interceptVector ~== interceptsR relTol 0.05)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.interceptVector ~== interceptsR relTol 0.05)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression without intercept without regularization") {
+
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    /*
+       Using the following R code to load the data and train the model using glmnet package.
+       library("glmnet")
+       data <- read.csv("path", header=FALSE)
+       label = as.factor(data$V1)
+       features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+       coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0, lambda = 0,
+        intercept=F))
+       > coefficients
+        $`0`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+            .
+        V2  0.06992464
+        V3 -0.36562784
+        V4  0.12142680
+        V5  0.32052211
+        $`1`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+            .
+        V2 -0.3036269
+        V3  0.9449630
+        V4 -0.2271038
+        V5 -0.4364839
+        $`2`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+            .
+        V2  0.2337022
+        V3 -0.5793351
+        V4  0.1056770
+        V5  0.1159618
+     */
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.0699246, -0.3656278, 0.1214268, 0.3205221,
+      -0.3036269, 0.9449630, -0.2271038, -0.4364839,
+      0.2337022, -0.5793351, 0.1056770, 0.1159618), isTransposed = true)
+
+    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression with intercept with L1 regularization") {
+
+    // use tighter constraints because OWL-QN solver takes longer to converge
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
+      .setMaxIter(300).setTol(1e-10)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
+      .setMaxIter(300).setTol(1e-10)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    /*
+       Use the following R code to load the data and train the model using glmnet package.
+       library("glmnet")
+       data <- read.csv("path", header=FALSE)
+       label = as.factor(data$V1)
+       features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+       coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 1,
+        lambda = 0.05, standardization=T))
+       coefficients = coef(glmnet(features, label, family="multinomial", alpha = 1, lambda = 0.05,
+        standardization=F))
+       > coefficientsStd
+        $`0`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+           -0.68988825
+        V2  .
+        V3  .
+        V4  .
+        V5  0.09404023
+
+        $`1`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+           -0.2303499
+        V2 -0.1232443
+        V3  0.3258380
+        V4 -0.1564688
+        V5 -0.2053965
+
+        $`2`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+            0.9202381
+        V2  .
+        V3 -0.4803856
+        V4  .
+        V5  .
+
+       > coefficients
+        $`0`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+           -0.44893320
+        V2  .
+        V3  .
+        V4  0.01933812
+        V5  0.03666044
+
+        $`1`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+            0.7376760
+        V2 -0.0577182
+        V3  .
+        V4 -0.2081718
+        V5 -0.1304592
+
+        $`2`
+        5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+           -0.2887428
+        V2  .
+        V3  .
+        V4  .
+        V5  .
+     */
+
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0, 0.09404023,
+      -0.1232443, 0.3258380, -0.1564688, -0.2053965,
+      0.0, -0.4803856, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.68988825, -0.2303499, 0.9202381)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.01933812, 0.03666044,
+      -0.0577182, 0.0, -0.2081718, -0.1304592,
+      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+    val interceptsR = Vectors.dense(-0.44893320, 0.7376760, -0.2887428)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.02)
+    assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.02)
+    assert(model2.interceptVector ~== interceptsR relTol 0.1)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression without intercept with L1 regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+    /*
+      Use the following R code to load the data and train the model using glmnet package.
+      library("glmnet")
+      data <- read.csv("path", header=FALSE)
+      label = as.factor(data$V1)
+      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 1,
+      lambda = 0.05, intercept=F, standardization=T))
+      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 1, lambda = 0.05,
+      intercept=F, standardization=F))
+      > coefficientsStd
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+         .
+      V2 .
+      V3 .
+      V4 .
+      V5 0.01525105
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          .
+      V2 -0.1502410
+      V3  0.5134658
+      V4 -0.1601146
+      V5 -0.2500232
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+         .
+      V2 0.003301875
+      V3 .
+      V4 .
+      V5 .
+
+      > coefficients
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+         s0
+          .
+      V2  .
+      V3  .
+      V4  .
+      V5  .
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          .
+      V2  .
+      V3  0.1943624
+      V4 -0.1902577
+      V5 -0.1028789
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+         s0
+          .
+      V2  .
+      V3  .
+      V4  .
+      V5  .
+     */
+
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0, 0.01525105,
+      -0.1502410, 0.5134658, -0.1601146, -0.2500232,
+      0.003301875, 0.0, 0.0, 0.0), isTransposed = true)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0, 0.0,
+      0.0, 0.1943624, -0.1902577, -0.1028789,
+      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression with intercept with L2 regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+    /*
+      Use the following R code to load the data and train the model using glmnet package.
+      library("glmnet")
+      data <- read.csv("path", header=FALSE)
+      label = as.factor(data$V1)
+      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0,
+      lambda = 0.1, intercept=T, standardization=T))
+      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0,
+      lambda = 0.1, intercept=T, standardization=F))
+      > coefficientsStd
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+         -1.70040424
+      V2  0.17576070
+      V3  0.01527894
+      V4  0.10216108
+      V5  0.26099531
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          0.2438590
+      V2 -0.2238875
+      V3  0.5967610
+      V4 -0.1555496
+      V5 -0.3010479
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          1.45654525
+      V2  0.04812679
+      V3 -0.61203992
+      V4  0.05338850
+      V5  0.04005258
+
+      > coefficients
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+         -1.65488543
+      V2  0.15715048
+      V3  0.01992903
+      V4  0.12428858
+      V5  0.22130317
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          1.1297533
+      V2 -0.1974768
+      V3  0.2776373
+      V4 -0.1869445
+      V5 -0.2510320
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          0.52513212
+      V2  0.04032627
+      V3 -0.29756637
+      V4  0.06265594
+      V5  0.02972883
+     */
+
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.17576070, 0.01527894, 0.10216108, 0.26099531,
+      -0.2238875, 0.5967610, -0.1555496, -0.3010479,
+      0.04812679, -0.61203992, 0.05338850, 0.04005258), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-1.70040424, 0.2438590, 1.45654525)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.15715048, 0.01992903, 0.12428858, 0.22130317,
+      -0.1974768, 0.2776373, -0.1869445, -0.2510320,
+      0.04032627, -0.29756637, 0.06265594, 0.02972883), isTransposed = true)
+    val interceptsR = Vectors.dense(-1.65488543, 1.1297533, 0.52513212)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd relTol 0.05)
+    assert(model1.interceptVector ~== interceptsRStd relTol 0.05)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model2.interceptVector ~== interceptsR relTol 0.05)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression without intercept with L2 regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+    /*
+      Use the following R code to load the data and train the model using glmnet package.
+      library("glmnet")
+      data <- read.csv("path", header=FALSE)
+      label = as.factor(data$V1)
+      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0,
+      lambda = 0.1, intercept=F, standardization=T))
+      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0,
+      lambda = 0.1, intercept=F, standardization=F))
+      > coefficientsStd
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          .
+      V2  0.03904171
+      V3 -0.23354322
+      V4  0.08288096
+      V5  0.22706393
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          .
+      V2 -0.2061848
+      V3  0.6341398
+      V4 -0.1530059
+      V5 -0.2958455
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          .
+      V2  0.16714312
+      V3 -0.40059658
+      V4  0.07012496
+      V5  0.06878158
+      > coefficients
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                   s0
+          .
+      V2 -0.005704542
+      V3 -0.144466409
+      V4  0.092080736
+      V5  0.182927657
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          .
+      V2 -0.08469036
+      V3  0.38996748
+      V4 -0.16468436
+      V5 -0.22522976
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          .
+      V2  0.09039490
+      V3 -0.24550107
+      V4  0.07260362
+      V5  0.04230210
+     */
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.03904171, -0.23354322, 0.08288096, 0.2270639,
+      -0.2061848, 0.6341398, -0.1530059, -0.2958455,
+      0.16714312, -0.40059658, 0.07012496, 0.06878158), isTransposed = true)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      -0.005704542, -0.144466409, 0.092080736, 0.182927657,
+      -0.08469036, 0.38996748, -0.16468436, -0.22522976,
+      0.0903949, -0.24550107, 0.07260362, 0.0423021), isTransposed = true)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression with intercept with elasticnet regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
+      .setMaxIter(300).setTol(1e-10)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
+      .setMaxIter(300).setTol(1e-10)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+    /*
+      Use the following R code to load the data and train the model using glmnet package.
+      library("glmnet")
+      data <- read.csv("path", header=FALSE)
+      label = as.factor(data$V1)
+      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
+      lambda = 0.1, intercept=T, standardization=T))
+      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
+      lambda = 0.1, intercept=T, standardization=F))
+      > coefficientsStd
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+         -0.5521819483
+      V2  0.0003092611
+      V3  .
+      V4  .
+      V5  0.0913818490
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+         -0.27531989
+      V2 -0.09790029
+      V3  0.28502034
+      V4 -0.12416487
+      V5 -0.16513373
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          0.8275018
+      V2  .
+      V3 -0.4044859
+      V4  .
+      V5  .
+
+      > coefficients
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+         -0.39876213
+      V2  .
+      V3  .
+      V4  0.02547520
+      V5  0.03893991
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          0.61089869
+      V2 -0.04224269
+      V3  .
+      V4 -0.18923970
+      V5 -0.09104249
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+         -0.2121366
+      V2  .
+      V3  .
+      V4  .
+      V5  .
+     */
+
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.0003092611, 0.0, 0.0, 0.091381849,
+      -0.09790029, 0.28502034, -0.12416487, -0.16513373,
+      0.0, -0.4044859, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.5521819483, -0.27531989, 0.8275018)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0254752, 0.03893991,
+      -0.04224269, 0.0, -0.1892397, -0.09104249,
+      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+    val interceptsR = Vectors.dense(-0.39876213, 0.61089869, -0.2121366)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector ~== interceptsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector ~== interceptsR absTol 0.01)
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
+  test("multinomial logistic regression without intercept with elasticnet regularization") {
+    val trainer1 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
+      .setMaxIter(300).setTol(1e-10)
+    val trainer2 = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
+      .setMaxIter(300).setTol(1e-10)
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+    /*
+      Use the following R code to load the data and train the model using glmnet package.
+      library("glmnet")
+      data <- read.csv("path", header=FALSE)
+      label = as.factor(data$V1)
+      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
+      lambda = 0.1, intercept=F, standardization=T))
+      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
+      lambda = 0.1, intercept=F, standardization=F))
+      > coefficientsStd
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+         .
+      V2 .
+      V3 .
+      V4 .
+      V5 0.03543706
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+          .
+      V2 -0.1187387
+      V3  0.4025482
+      V4 -0.1270969
+      V5 -0.1918386
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                 s0
+         .
+      V2 0.00774365
+      V3 .
+      V4 .
+      V5 .
+
+      > coefficients
+      $`0`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+         s0
+          .
+      V2  .
+      V3  .
+      V4  .
+      V5  .
+
+      $`1`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+                  s0
+          .
+      V2  .
+      V3  0.14666497
+      V4 -0.16570638
+      V5 -0.05982875
+
+      $`2`
+      5 x 1 sparse Matrix of class "dgCMatrix"
+         s0
+          .
+      V2  .
+      V3  .
+      V4  .
+      V5  .
+     */
+    val coefficientsRStd = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0, 0.03543706,
+      -0.1187387, 0.4025482, -0.1270969, -0.1918386,
+      0.0, 0.0, 0.0, 0.00774365), isTransposed = true)
+
+    val coefficientsR = new DenseMatrix(3, 4, Array(
+      0.0, 0.0, 0.0, 0.0,
+      0.0, 0.14666497, -0.16570638, -0.05982875,
+      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+
+    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
+    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
   test("evaluate on test set") {
     // TODO: add for multiclass when model summary becomes available
     // Evaluate on test set should be same as that of the transformed training data.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
deleted file mode 100644
index 5725a47dd8652..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultinomialLogisticRegressionSuite.scala
+++ /dev/null
@@ -1,842 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.classification
-
-import scala.language.existentials
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.attribute.NominalAttribute
-import org.apache.spark.ml.classification.LogisticRegressionSuite._
-import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg._
-import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
-
-class MultinomialLogisticRegressionSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
-
-  @transient var dataset: Dataset[_] = _
-  @transient var multinomialDataset: DataFrame = _
-  private val eps: Double = 1e-5
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-
-    dataset = {
-      val nPoints = 100
-      val coefficients = Array(
-        -0.57997, 0.912083, -0.371077,
-        -0.16624, -0.84355, -0.048509)
-
-      val xMean = Array(5.843, 3.057)
-      val xVariance = Array(0.6856, 0.1899)
-
-      val testData = generateMultinomialLogisticInput(
-        coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
-
-      val df = spark.createDataFrame(sc.parallelize(testData, 4))
-      df.cache()
-      df
-    }
-
-    multinomialDataset = {
-      val nPoints = 10000
-      val coefficients = Array(
-        -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-        -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
-
-      val xMean = Array(5.843, 3.057, 3.758, 1.199)
-      val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
-
-      val testData = generateMultinomialLogisticInput(
-        coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
-
-      val df = spark.createDataFrame(sc.parallelize(testData, 4))
-      df.cache()
-      df
-    }
-  }
-
-  /**
-   * Enable the ignored test to export the dataset into CSV format,
-   * so we can validate the training accuracy compared with R's glmnet package.
-   */
-  ignore("export test data into CSV format") {
-    val rdd = multinomialDataset.rdd.map { case Row(label: Double, features: Vector) =>
-      label + "," + features.toArray.mkString(",")
-    }.repartition(1)
-    rdd.saveAsTextFile("target/tmp/MultinomialLogisticRegressionSuite/multinomialDataset")
-  }
-
-//  test("params") {
-//    ParamsSuite.checkParams(new LogisticRegression)
-//    val model = new LogisticRegressionModel("mLogReg",
-//      Matrices.dense(2, 1, Array(0.0, 0.0)), Vectors.dense(0.0, 0.0), 2, true)
-//    ParamsSuite.checkParams(model)
-//  }
-//
-//  test("multinomial logistic regression: default params") {
-//    val mlr = new LogisticRegression
-//    assert(mlr.getLabelCol === "label")
-//    assert(mlr.getFeaturesCol === "features")
-//    assert(mlr.getPredictionCol === "prediction")
-//    assert(mlr.getRawPredictionCol === "rawPrediction")
-//    assert(mlr.getProbabilityCol === "probability")
-//    assert(!mlr.isDefined(mlr.weightCol))
-//    assert(!mlr.isDefined(mlr.thresholds))
-//    assert(mlr.getFitIntercept)
-//    assert(mlr.getStandardization)
-//    val model = mlr.fit(dataset)
-//    model.transform(dataset)
-//      .select("label", "probability", "prediction", "rawPrediction")
-//      .collect()
-//    assert(model.getFeaturesCol === "features")
-//    assert(model.getPredictionCol === "prediction")
-//    assert(model.getRawPredictionCol === "rawPrediction")
-//    assert(model.getProbabilityCol === "probability")
-//    assert(model.interceptVector !== Vectors.dense(0.0, 0.0))
-//    assert(model.hasParent)
-//  }
-
-  test("multinomial logistic regression with intercept without regularization") {
-
-    val trainer1 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setMaxIter(100)
-    val trainer2 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-
-    /*
-       Using the following R code to load the data and train the model using glmnet package.
-       > library("glmnet")
-       > data <- read.csv("path", header=FALSE)
-       > label = as.factor(data$V1)
-       > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       > coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0, lambda = 0))
-       > coefficients
-        $`0`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-           -2.24493379
-        V2  0.25096771
-        V3 -0.03915938
-        V4  0.14766639
-        V5  0.36810817
-        $`1`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-            0.3778931
-        V2 -0.3327489
-        V3  0.8893666
-        V4 -0.2306948
-        V5 -0.4442330
-        $`2`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-            1.86704066
-        V2  0.08178121
-        V3 -0.85020722
-        V4  0.08302840
-        V5  0.07612480
-     */
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.2509677, -0.0391594, 0.1476664, 0.3681082,
-      -0.3327489, 0.8893666, -0.2306948, -0.4442330,
-      0.0817812, -0.8502072, 0.0830284, 0.0761248), isTransposed = true)
-    val interceptsR = Vectors.dense(-2.2449338, 0.3778931, 1.8670407)
-
-    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
-    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
-    assert(model1.interceptVector ~== interceptsR relTol 0.05)
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
-    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.interceptVector ~== interceptsR relTol 0.05)
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression without intercept without regularization") {
-
-    val trainer1 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true)
-    val trainer2 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(false)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-
-    /*
-       Using the following R code to load the data and train the model using glmnet package.
-       library("glmnet")
-       data <- read.csv("path", header=FALSE)
-       label = as.factor(data$V1)
-       features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0, lambda = 0,
-        intercept=F))
-       > coefficients
-        $`0`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-            .
-        V2  0.06992464
-        V3 -0.36562784
-        V4  0.12142680
-        V5  0.32052211
-        $`1`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-            .
-        V2 -0.3036269
-        V3  0.9449630
-        V4 -0.2271038
-        V5 -0.4364839
-        $`2`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-            .
-        V2  0.2337022
-        V3 -0.5793351
-        V4  0.1056770
-        V5  0.1159618
-     */
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0699246, -0.3656278, 0.1214268, 0.3205221,
-      -0.3036269, 0.9449630, -0.2271038, -0.4364839,
-      0.2337022, -0.5793351, 0.1056770, 0.1159618), isTransposed = true)
-
-    assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
-    assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
-    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
-    assert(model2.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression with intercept with L1 regularization") {
-
-    // use tighter constraints because OWL-QN solver takes longer to converge
-    val trainer1 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
-      .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
-      .setMaxIter(300).setTol(1e-10)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-
-    /*
-       Use the following R code to load the data and train the model using glmnet package.
-       library("glmnet")
-       data <- read.csv("path", header=FALSE)
-       label = as.factor(data$V1)
-       features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 1,
-        lambda = 0.05, standardization=T))
-       coefficients = coef(glmnet(features, label, family="multinomial", alpha = 1, lambda = 0.05,
-        standardization=F))
-       > coefficientsStd
-        $`0`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-           -0.68988825
-        V2  .
-        V3  .
-        V4  .
-        V5  0.09404023
-
-        $`1`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-           -0.2303499
-        V2 -0.1232443
-        V3  0.3258380
-        V4 -0.1564688
-        V5 -0.2053965
-
-        $`2`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-            0.9202381
-        V2  .
-        V3 -0.4803856
-        V4  .
-        V5  .
-
-       > coefficients
-        $`0`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-           -0.44893320
-        V2  .
-        V3  .
-        V4  0.01933812
-        V5  0.03666044
-
-        $`1`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-            0.7376760
-        V2 -0.0577182
-        V3  .
-        V4 -0.2081718
-        V5 -0.1304592
-
-        $`2`
-        5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-           -0.2887428
-        V2  .
-        V3  .
-        V4  .
-        V5  .
-     */
-
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.09404023,
-      -0.1232443, 0.3258380, -0.1564688, -0.2053965,
-      0.0, -0.4803856, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.68988825, -0.2303499, 0.9202381)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.01933812, 0.03666044,
-      -0.0577182, 0.0, -0.2081718, -0.1304592,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.44893320, 0.7376760, -0.2887428)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.02)
-    assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.02)
-    assert(model2.interceptVector ~== interceptsR relTol 0.1)
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression without intercept with L1 regularization") {
-    val trainer1 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
-    val trainer2 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-    /*
-      Use the following R code to load the data and train the model using glmnet package.
-      library("glmnet")
-      data <- read.csv("path", header=FALSE)
-      label = as.factor(data$V1)
-      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 1,
-      lambda = 0.05, intercept=F, standardization=T))
-      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 1, lambda = 0.05,
-      intercept=F, standardization=F))
-      > coefficientsStd
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-         .
-      V2 .
-      V3 .
-      V4 .
-      V5 0.01525105
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          .
-      V2 -0.1502410
-      V3  0.5134658
-      V4 -0.1601146
-      V5 -0.2500232
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-         .
-      V2 0.003301875
-      V3 .
-      V4 .
-      V5 .
-
-      > coefficients
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-         s0
-          .
-      V2  .
-      V3  .
-      V4  .
-      V5  .
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          .
-      V2  .
-      V3  0.1943624
-      V4 -0.1902577
-      V5 -0.1028789
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-         s0
-          .
-      V2  .
-      V3  .
-      V4  .
-      V5  .
-     */
-
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.01525105,
-      -0.1502410, 0.5134658, -0.1601146, -0.2500232,
-      0.003301875, 0.0, 0.0, 0.0), isTransposed = true)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.0,
-      0.0, 0.1943624, -0.1902577, -0.1028789,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
-    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
-    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression with intercept with L2 regularization") {
-    val trainer1 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
-    val trainer2 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-    /*
-      Use the following R code to load the data and train the model using glmnet package.
-      library("glmnet")
-      data <- read.csv("path", header=FALSE)
-      label = as.factor(data$V1)
-      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0,
-      lambda = 0.1, intercept=T, standardization=T))
-      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0,
-      lambda = 0.1, intercept=T, standardization=F))
-      > coefficientsStd
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-         -1.70040424
-      V2  0.17576070
-      V3  0.01527894
-      V4  0.10216108
-      V5  0.26099531
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          0.2438590
-      V2 -0.2238875
-      V3  0.5967610
-      V4 -0.1555496
-      V5 -0.3010479
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          1.45654525
-      V2  0.04812679
-      V3 -0.61203992
-      V4  0.05338850
-      V5  0.04005258
-
-      > coefficients
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-         -1.65488543
-      V2  0.15715048
-      V3  0.01992903
-      V4  0.12428858
-      V5  0.22130317
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          1.1297533
-      V2 -0.1974768
-      V3  0.2776373
-      V4 -0.1869445
-      V5 -0.2510320
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          0.52513212
-      V2  0.04032627
-      V3 -0.29756637
-      V4  0.06265594
-      V5  0.02972883
-     */
-
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.17576070, 0.01527894, 0.10216108, 0.26099531,
-      -0.2238875, 0.5967610, -0.1555496, -0.3010479,
-      0.04812679, -0.61203992, 0.05338850, 0.04005258), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-1.70040424, 0.2438590, 1.45654525)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.15715048, 0.01992903, 0.12428858, 0.22130317,
-      -0.1974768, 0.2776373, -0.1869445, -0.2510320,
-      0.04032627, -0.29756637, 0.06265594, 0.02972883), isTransposed = true)
-    val interceptsR = Vectors.dense(-1.65488543, 1.1297533, 0.52513212)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd relTol 0.05)
-    assert(model1.interceptVector ~== interceptsRStd relTol 0.05)
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR relTol 0.05)
-    assert(model2.interceptVector ~== interceptsR relTol 0.05)
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression without intercept with L2 regularization") {
-    val trainer1 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true)
-    val trainer2 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(false)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-    /*
-      Use the following R code to load the data and train the model using glmnet package.
-      library("glmnet")
-      data <- read.csv("path", header=FALSE)
-      label = as.factor(data$V1)
-      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0,
-      lambda = 0.1, intercept=F, standardization=T))
-      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0,
-      lambda = 0.1, intercept=F, standardization=F))
-      > coefficientsStd
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          .
-      V2  0.03904171
-      V3 -0.23354322
-      V4  0.08288096
-      V5  0.22706393
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          .
-      V2 -0.2061848
-      V3  0.6341398
-      V4 -0.1530059
-      V5 -0.2958455
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          .
-      V2  0.16714312
-      V3 -0.40059658
-      V4  0.07012496
-      V5  0.06878158
-      > coefficients
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                   s0
-          .
-      V2 -0.005704542
-      V3 -0.144466409
-      V4  0.092080736
-      V5  0.182927657
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          .
-      V2 -0.08469036
-      V3  0.38996748
-      V4 -0.16468436
-      V5 -0.22522976
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          .
-      V2  0.09039490
-      V3 -0.24550107
-      V4  0.07260362
-      V5  0.04230210
-     */
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.03904171, -0.23354322, 0.08288096, 0.2270639,
-      -0.2061848, 0.6341398, -0.1530059, -0.2958455,
-      0.16714312, -0.40059658, 0.07012496, 0.06878158), isTransposed = true)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      -0.005704542, -0.144466409, 0.092080736, 0.182927657,
-      -0.08469036, 0.38996748, -0.16468436, -0.22522976,
-      0.0903949, -0.24550107, 0.07260362, 0.0423021), isTransposed = true)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
-    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
-    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression with intercept with elasticnet regularization") {
-    val trainer1 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new LogisticRegression).setFitIntercept(true)
-      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(300).setTol(1e-10)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-    /*
-      Use the following R code to load the data and train the model using glmnet package.
-      library("glmnet")
-      data <- read.csv("path", header=FALSE)
-      label = as.factor(data$V1)
-      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
-      lambda = 0.1, intercept=T, standardization=T))
-      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
-      lambda = 0.1, intercept=T, standardization=F))
-      > coefficientsStd
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                    s0
-         -0.5521819483
-      V2  0.0003092611
-      V3  .
-      V4  .
-      V5  0.0913818490
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-         -0.27531989
-      V2 -0.09790029
-      V3  0.28502034
-      V4 -0.12416487
-      V5 -0.16513373
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          0.8275018
-      V2  .
-      V3 -0.4044859
-      V4  .
-      V5  .
-
-      > coefficients
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-         -0.39876213
-      V2  .
-      V3  .
-      V4  0.02547520
-      V5  0.03893991
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          0.61089869
-      V2 -0.04224269
-      V3  .
-      V4 -0.18923970
-      V5 -0.09104249
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-         -0.2121366
-      V2  .
-      V3  .
-      V4  .
-      V5  .
-     */
-
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0003092611, 0.0, 0.0, 0.091381849,
-      -0.09790029, 0.28502034, -0.12416487, -0.16513373,
-      0.0, -0.4044859, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.5521819483, -0.27531989, 0.8275018)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0254752, 0.03893991,
-      -0.04224269, 0.0, -0.1892397, -0.09104249,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.39876213, 0.61089869, -0.2121366)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
-    assert(model1.interceptVector ~== interceptsRStd absTol 0.01)
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
-    assert(model2.interceptVector ~== interceptsR absTol 0.01)
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-  test("multinomial logistic regression without intercept with elasticnet regularization") {
-    val trainer1 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(300).setTol(1e-10)
-    val trainer2 = (new LogisticRegression).setFitIntercept(false)
-      .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(300).setTol(1e-10)
-
-    val model1 = trainer1.fit(multinomialDataset)
-    val model2 = trainer2.fit(multinomialDataset)
-    /*
-      Use the following R code to load the data and train the model using glmnet package.
-      library("glmnet")
-      data <- read.csv("path", header=FALSE)
-      label = as.factor(data$V1)
-      features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-      coefficientsStd = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
-      lambda = 0.1, intercept=F, standardization=T))
-      coefficients = coef(glmnet(features, label, family="multinomial", alpha = 0.5,
-      lambda = 0.1, intercept=F, standardization=F))
-      > coefficientsStd
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-         .
-      V2 .
-      V3 .
-      V4 .
-      V5 0.03543706
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-          .
-      V2 -0.1187387
-      V3  0.4025482
-      V4 -0.1270969
-      V5 -0.1918386
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                 s0
-         .
-      V2 0.00774365
-      V3 .
-      V4 .
-      V5 .
-
-      > coefficients
-      $`0`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-         s0
-          .
-      V2  .
-      V3  .
-      V4  .
-      V5  .
-
-      $`1`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-                  s0
-          .
-      V2  .
-      V3  0.14666497
-      V4 -0.16570638
-      V5 -0.05982875
-
-      $`2`
-      5 x 1 sparse Matrix of class "dgCMatrix"
-         s0
-          .
-      V2  .
-      V3  .
-      V4  .
-      V5  .
-     */
-    val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.03543706,
-      -0.1187387, 0.4025482, -0.1270969, -0.1918386,
-      0.0, 0.0, 0.0, 0.00774365), isTransposed = true)
-
-    val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.0,
-      0.0, 0.14666497, -0.16570638, -0.05982875,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
-
-    assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
-    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
-    assert(model2.coefficientMatrix ~== coefficientsR absTol 0.01)
-    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
-    assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
-  }
-
-//  test("read/write") {
-//    def checkModelData(
-//        model: LogisticRegressionModel,
-//        model2: LogisticRegressionModel): Unit = {
-//      assert(model.interceptVector === model2.interceptVector)
-//      assert(model.coefficientMatrix.toArray === model2.coefficients.toArray)
-//      assert(model.numClasses === model2.numClasses)
-//      assert(model.numFeatures === model2.numFeatures)
-//    }
-//    val mlr = new LogisticRegression()
-//    testEstimatorAndModelReadWrite(mlr, dataset,
-//      MultinomialLogisticRegressionSuite.allParamSettings,
-//      checkModelData)
-//  }
-}
-
-object MultinomialLogisticRegressionSuite {
-
-  /**
-   * Mapping from all Params to valid settings which differ from the defaults.
-   * This is useful for tests which need to exercise all Params, such as save/load.
-   * This excludes input columns to simplify some tests.
-   */
-  val allParamSettings: Map[String, Any] = ProbabilisticClassifierSuite.allParamSettings ++ Map(
-    "probabilityCol" -> "myProbability",
-    "thresholds" -> Array(0.4, 0.6),
-    "regParam" -> 0.01,
-    "elasticNetParam" -> 0.1,
-    "maxIter" -> 2, // intentionally small
-    "fitIntercept" -> true,
-    "tol" -> 0.8,
-    "standardization" -> false
-  )
-}

From 262bc996063f4d07b9440d6164be01f497d180ef Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 26 Aug 2016 09:36:05 -0700
Subject: [PATCH 10/24] some small fixes

---
 .../classification/LogisticRegression.scala   | 24 +++++++++----------
 .../ProbabilisticClassifier.scala             |  6 -----
 .../classification/LogisticRegression.scala   |  4 ++--
 .../LogisticRegressionSuite.scala             | 15 ++++++------
 .../ml/classification/OneVsRestSuite.scala    |  4 ++--
 .../spark/ml/tuning/CrossValidatorSuite.scala |  4 ++--
 .../ml/tuning/TrainValidationSplitSuite.scala |  4 ++--
 7 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 80426fc019e83..1a7d6a2aa68a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -68,6 +68,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    *
    * @group setParam
    */
+  // TODO: Implement SPARK-11543?
   def setThreshold(value: Double): this.type = {
     if (isSet(thresholds)) clear(thresholds)
     set(threshold, value)
@@ -88,14 +89,14 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    *
    * @group param
    */
-  @Since("2.0.0")
+  @Since("2.1.0")
   final val family: Param[String] = new Param(this, "family",
     "The name of family which is a description of the label distribution to be used in the " +
       s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
     ParamValidators.inArray[String](supportedFamilyNames))
 
   /** @group getParam */
-  @Since("2.0.0")
+  @Since("2.1.0")
   def getFamily: String = $(family)
 
   /**
@@ -252,7 +253,7 @@ class LogisticRegression @Since("1.2.0") (
    *
    * @group setParam
    */
-  @Since("2.0.0")
+  @Since("2.1.0")
   def setFamily(value: String): this.type = set(family, value)
   setDefault(family -> "auto")
 
@@ -271,7 +272,6 @@ class LogisticRegression @Since("1.2.0") (
   setDefault(standardization -> true)
 
   @Since("1.5.0")
-  // TODO: Check this behavior
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
 
   @Since("1.5.0")
@@ -354,18 +354,18 @@ class LogisticRegression @Since("1.2.0") (
     val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
       case Some(n: Int) =>
         require(n >= histogram.length, s"Specified number of classes $n was " +
-          s"less than the number of unique labels ${histogram.length}")
+          s"less than the number of unique labels ${histogram.length}.")
         n
       case None => histogram.length
     }
     val isBinaryClassification = numClasses == 1 || numClasses == 2
-    val isMultinomial = ($(family) == LogisticRegression.auto && !isBinaryClassification) ||
-      ($(family) == LogisticRegression.multinomial)
+    val isMultinomial = ($(family) == LogisticRegression.Auto && !isBinaryClassification) ||
+      ($(family) == LogisticRegression.Multinomial)
     val numCoefficientSets = if (isMultinomial) numClasses else 1
 
     if (!isMultinomial) {
       require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
-        s"outcome classes but found $numClasses")
+        s"outcome classes but found $numClasses.")
     }
 
     if (isDefined(thresholds)) {
@@ -646,11 +646,11 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
   @Since("1.6.0")
   override def load(path: String): LogisticRegression = super.load(path)
 
-  private val multinomial = "multinomial"
-  private val binomial = "binomial"
-  private val auto = "auto"
+  private val Multinomial = "multinomial"
+  private val Binomial = "binomial"
+  private val Auto = "auto"
 
-  private[classification] lazy val supportedFamilyNames = Array(auto, binomial, multinomial)
+  private[classification] val supportedFamilyNames = Array(Auto, Binomial, Multinomial)
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 989bd19528a97..1a07aab663030 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -219,12 +219,6 @@ abstract class ProbabilisticClassificationModel[
         i += 1
       }
       argMax
-//      val thresholds: Array[Double] = getThresholds
-//      val scaledProbability: Array[Double] =
-//        probability.toArray.zip(thresholds).map { case (p, t) =>
-//          if (t == 0.0) Double.PositiveInfinity else p / t
-//        }
-//      Vectors.dense(scaledProbability).argmax
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index c3770dd0a12df..d851b983349c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -432,8 +432,8 @@ class LogisticRegressionWithLBFGS
         if (userSuppliedWeights) {
           val uid = Identifiable.randomUID("logreg-static")
           lr.setInitialModel(new org.apache.spark.ml.classification.LogisticRegressionModel(uid,
-            new DenseMatrix(1, initialWeights.size, initialWeights.toArray, isTransposed = true),
-            Vectors.dense(0.0).asML, 2, false))
+            new DenseMatrix(1, initialWeights.size, initialWeights.toArray),
+            Vectors.dense(1.0).asML, 2, false))
         }
         lr.setFitIntercept(addIntercept)
         lr.setMaxIter(optimizer.getNumIterations())
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 47c1a7218fcbd..31f991b3fd5e4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -17,22 +17,21 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.ml.attribute.NominalAttribute
-
 import scala.collection.JavaConverters._
 import scala.language.existentials
 import scala.util.Random
 import scala.util.control.Breaks._
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.linalg.{Matrices, DenseMatrix, Vector, Vectors}
+import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.functions.lit
 
 class LogisticRegressionSuite
@@ -99,7 +98,7 @@ class LogisticRegressionSuite
   }
 
   /**
-   * Enable the ignored test to export the smallBinaryDataset into CSV format,
+   * Enable the ignored test to export the dataset into CSV format,
    * so we can validate the training accuracy compared with R's glmnet package.
    */
   ignore("export test data into CSV format") {
@@ -114,7 +113,7 @@ class LogisticRegressionSuite
   test("params") {
     ParamsSuite.checkParams(new LogisticRegression)
     val model = new LogisticRegressionModel("logReg",
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
+      new DenseMatrix(1, 1, Array(0.0)), Vectors.dense(0.0), 2, isMultinomial = false)
     ParamsSuite.checkParams(model)
   }
 
@@ -1839,6 +1838,7 @@ class LogisticRegressionSuite
     predictions1.zip(predictions2).foreach { case (Row(p1: Double), Row(p2: Double)) =>
       assert(p1 === p2)
     }
+    assert(model2.summary.totalIterations === 1)
 
     val lr3 = new LogisticRegression()
     val model3 = lr3.fit(smallMultinomialDataset)
@@ -1849,6 +1849,7 @@ class LogisticRegressionSuite
     predictions3.zip(predictions4).foreach { case (Row(p1: Double), Row(p2: Double)) =>
       assert(p1 === p2)
     }
+    // TODO: check that it converges in a single iteration when initial model is available
   }
 
   test("logistic regression with all labels the same") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 09e38786aa002..3ae47029c8dd8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.feature.StringIndexer
-import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
+import org.apache.spark.ml.linalg.{Matrices, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MetadataUtils, MLTestingUtils}
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -61,7 +61,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
   test("params") {
     ParamsSuite.checkParams(new OneVsRest)
     val lrModel = new LogisticRegressionModel("logReg",
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
+      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
     val model = new OneVsRestModel("ovr", Metadata.empty, Array(lrModel))
     ParamsSuite.checkParams(model)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index 0fb26f26e7792..87c7c82e4c3b2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressio
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
 import org.apache.spark.ml.feature.HashingTF
-import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
+import org.apache.spark.ml.linalg.{Matrices, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamPair}
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -245,7 +245,7 @@ class CrossValidatorSuite
     val lr = new LogisticRegression()
       .setThreshold(0.6)
     val lrModel = new LogisticRegressionModel(lr.uid,
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
+      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
       .setMetricName("areaUnderPR")  // not default metric
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index a05a1d641f1bb..6c58bed9812c1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
-import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
+import org.apache.spark.ml.linalg.{Matrices, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -134,7 +134,7 @@ class TrainValidationSplitSuite
     val lr = new LogisticRegression()
       .setThreshold(0.6)
     val lrModel = new LogisticRegressionModel(lr.uid,
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = false), Vectors.dense(0.0), 2, false)
+      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
     val paramMaps = new ParamGridBuilder()

From b64ffad60d8f344a576227bf5f150eea5679aaa9 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 26 Aug 2016 10:52:29 -0700
Subject: [PATCH 11/24] use _coefficients

---
 .../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 1a7d6a2aa68a5..314fde435eb5a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -705,7 +705,7 @@ class LogisticRegressionModel private[spark] (
 
   /** Margin (rawPrediction) for class label 1.  For binary classification only. */
   private val margin: Vector => Double = (features) => {
-    BLAS.dot(features, coefficients) + _intercept
+    BLAS.dot(features, _coefficients) + _intercept
   }
 
   /** Margin (rawPrediction) for each class label. */

From 7895c8176df4fccc144ddb29079af8dc8a9a1942 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 26 Aug 2016 11:15:58 -0700
Subject: [PATCH 12/24] use strings in supported families

---
 .../spark/ml/classification/LogisticRegression.scala  | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 314fde435eb5a..75fab6fc81094 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -359,8 +359,8 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
     val isBinaryClassification = numClasses == 1 || numClasses == 2
-    val isMultinomial = ($(family) == LogisticRegression.Auto && !isBinaryClassification) ||
-      ($(family) == LogisticRegression.Multinomial)
+    val isMultinomial = ($(family) == "auto" && !isBinaryClassification) ||
+      ($(family) == "multinomial")
     val numCoefficientSets = if (isMultinomial) numClasses else 1
 
     if (!isMultinomial) {
@@ -646,11 +646,8 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
   @Since("1.6.0")
   override def load(path: String): LogisticRegression = super.load(path)
 
-  private val Multinomial = "multinomial"
-  private val Binomial = "binomial"
-  private val Auto = "auto"
-
-  private[classification] val supportedFamilyNames = Array(Auto, Binomial, Multinomial)
+  private[classification] val supportedFamilyNames =
+    Array("auto", "binomial", "multinomial").map(_.toLowerCase)
 }
 
 /**

From c9b6d970a625fff921d0c512bb7a1dd4f7a10bf1 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Thu, 1 Sep 2016 21:43:32 -0700
Subject: [PATCH 13/24] mima exclusion for lr model constructor

---
 project/MimaExcludes.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 1bdcf9a623dc9..d4cbf510b9a5c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -812,6 +812,9 @@ object MimaExcludes {
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.exec.ExecutorsListener.executorToTotalCores"),
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.exec.ExecutorsListener.executorToTasksMax"),
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.exec.ExecutorsListener.executorToJvmGCTime")
+    ) ++ Seq(
+      // [SPARK-17163] Unify logistic regression interface. Private constructor has new signature.
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionModel.this")
     )
   }
 

From b532692f4f63c414dbd4bec38c0adecb5d83d853 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 9 Sep 2016 13:24:33 -0700
Subject: [PATCH 14/24] address initial review

---
 .../classification/LogisticRegression.scala   | 56 +++++++++++--------
 .../LogisticRegressionSuite.scala             | 24 +++++++-
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 75fab6fc81094..b5ef73cfa8356 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -50,7 +50,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol
   with HasStandardization with HasWeightCol with HasThreshold with HasAggregationDepth {
 
-  import LogisticRegression._
+  import org.apache.spark.ml.classification.LogisticRegression.supportedFamilyNames
 
   /**
    * Set threshold in binary classification, in range [0, 1].
@@ -377,7 +377,7 @@ class LogisticRegression @Since("1.2.0") (
     instr.logNumClasses(numClasses)
     instr.logNumFeatures(numFeatures)
 
-    val (coefficients, intercept, objectiveHistory) = {
+    val (coefficientMatrix, interceptVector, objectiveHistory) = {
       if (numInvalid != 0) {
         val msg = s"Classification labels should be in [0 to ${numClasses - 1}]. " +
           s"Found $numInvalid invalid labels."
@@ -385,20 +385,25 @@ class LogisticRegression @Since("1.2.0") (
         throw new SparkException(msg)
       }
 
-      val isConstantLabel = histogram.count(_ != 0) == 1
+      val isConstantLabel = histogram.count(_ != 0.0) == 1
 
       if ($(fitIntercept) && isConstantLabel) {
         logWarning(s"All labels are the same value and fitIntercept=true, so the coefficients " +
           s"will be zeros. Training is not needed.")
         val constantLabelIndex = Vectors.dense(histogram).argmax
-        val coefficientMatrix = Matrices.sparse(numCoefficientSets, numFeatures,
-          Array.fill(numFeatures + 1)(0), Array.empty[Int], Array.empty[Double])
-        val interceptVector = if (isMultinomial) {
+        val coefMatrix = if (numFeatures < numClasses) {
+          new SparseMatrix(numCoefficientSets, numFeatures,
+            Array.fill(numFeatures + 1)(0), Array.empty[Int], Array.empty[Double])
+        } else {
+          new SparseMatrix(numCoefficientSets, numFeatures, Array.fill(numClasses + 1)(0),
+            Array.empty[Int], Array.empty[Double], isTransposed = true)
+        }
+        val interceptVec = if (isMultinomial) {
           Vectors.sparse(numClasses, Seq((constantLabelIndex, Double.PositiveInfinity)))
         } else {
           Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
         }
-        (coefficientMatrix, interceptVector, Array.empty[Double])
+        (coefMatrix, interceptVec, Array.empty[Double])
       } else {
         if (!$(fitIntercept) && isConstantLabel) {
           logWarning(s"All labels belong to a single class and fitIntercept=false. It's a " +
@@ -460,31 +465,34 @@ class LogisticRegression @Since("1.2.0") (
         val initialCoefficientsWithIntercept =
           Vectors.zeros(numCoefficientSets * numFeaturesPlusIntercept)
 
-        val initialModelIsValid = optInitialModel.exists { model =>
-          val providedCoefs = model.coefficientMatrix
-          val modelValid = (providedCoefs.numRows == numCoefficientSets) &&
-            (providedCoefs.numCols == numFeatures) &&
-            (model.interceptVector.size == numCoefficientSets)
-          if (!modelValid) {
-            logWarning(s"Initial coefficients will be ignored! Its dimensions " +
-              s"(${providedCoefs.numRows}, ${providedCoefs.numCols}) did not match the expected " +
-              s"size ($numCoefficientSets, $numFeatures)")
-          }
-          modelValid
+        val initialModelIsValid = optInitialModel match {
+          case Some(_initialModel) =>
+            val providedCoefs = _initialModel.coefficientMatrix
+            val modelIsValid = (providedCoefs.numRows == numCoefficientSets) &&
+              (providedCoefs.numCols == numFeatures) &&
+              (_initialModel.interceptVector.size == numCoefficientSets) &&
+              (_initialModel.getFitIntercept == $(fitIntercept))
+            if (!modelIsValid) {
+              logWarning(s"Initial coefficients will be ignored! Its dimensions " +
+                s"(${providedCoefs.numRows}, ${providedCoefs.numCols}) did not match the " +
+                s"expected size ($numCoefficientSets, $numFeatures)")
+            }
+            modelIsValid
+          case None => false
         }
 
         if (initialModelIsValid) {
-          val initialCoefArray = initialCoefficientsWithIntercept.toArray
+          val initialCoefWithInterceptArray = initialCoefficientsWithIntercept.toArray
           val providedCoef = optInitialModel.get.coefficientMatrix
           providedCoef.foreachActive { (row, col, value) =>
             val flatIndex = row * numFeaturesPlusIntercept + col
             // We need to scale the coefficients since they will be trained in the scaled space
-            initialCoefArray(flatIndex) = value * featuresStd(col)
+            initialCoefWithInterceptArray(flatIndex) = value * featuresStd(col)
           }
           if ($(fitIntercept)) {
             optInitialModel.get.interceptVector.foreachActive { (index, value) =>
               val coefIndex = (index + 1) * numFeaturesPlusIntercept - 1
-              initialCoefArray(coefIndex) = value
+              initialCoefWithInterceptArray(coefIndex) = value
             }
           }
         } else if ($(fitIntercept) && isMultinomial) {
@@ -549,13 +557,13 @@ class LogisticRegression @Since("1.2.0") (
           state = states.next()
           arrayBuilder += state.adjustedValue
         }
+        bcFeaturesStd.destroy(blocking = false)
 
         if (state == null) {
           val msg = s"${optimizer.getClass.getName} failed."
           logError(msg)
           throw new SparkException(msg)
         }
-        bcFeaturesStd.destroy(blocking = false)
 
         /*
            The coefficients are trained in the scaled space; we're converting them back to
@@ -617,8 +625,8 @@ class LogisticRegression @Since("1.2.0") (
 
     if (handlePersistence) instances.unpersist()
 
-    val model = copyValues(new LogisticRegressionModel(uid, coefficients, intercept, numClasses,
-      isMultinomial))
+    val model = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, interceptVector,
+      numClasses, isMultinomial))
     // TODO: implement summary model for multinomial case
     val m = if (!isMultinomial) {
       val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 31f991b3fd5e4..5af825ca0c0c3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Vector, Vectors}
+import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -1849,7 +1849,7 @@ class LogisticRegressionSuite
     predictions3.zip(predictions4).foreach { case (Row(p1: Double), Row(p2: Double)) =>
       assert(p1 === p2)
     }
-    // TODO: check that it converges in a single iteration when initial model is available
+    // TODO: check that it converges in a single iteration when model summary is available
   }
 
   test("logistic regression with all labels the same") {
@@ -1894,6 +1894,26 @@ class LogisticRegressionSuite
     assert(allOneNoInterceptModel.summary.totalIterations > 0)
   }
 
+  test("compressed storage") {
+    val moreClassesThanFeatures = spark.createDataFrame(Seq(
+      LabeledPoint(4.0, Vectors.dense(0.0, 0.0, 0.0)),
+      LabeledPoint(4.0, Vectors.dense(1.0, 1.0, 1.0)),
+      LabeledPoint(4.0, Vectors.dense(2.0, 2.0, 2.0)))
+    )
+    val mlr = new LogisticRegression().setFamily("multinomial")
+    val model = mlr.fit(moreClassesThanFeatures)
+    assert(model.coefficientMatrix.isInstanceOf[SparseMatrix])
+    assert(model.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 4)
+    val moreFeaturesThanClasses = spark.createDataFrame(Seq(
+      LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0, 1.0, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(2.0, 2.0, 2.0)))
+    )
+    val model2 = mlr.fit(moreFeaturesThanClasses)
+    assert(model2.coefficientMatrix.isInstanceOf[SparseMatrix])
+    assert(model2.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 3)
+  }
+
   test("multiclass logistic regression with all labels the same") {
     val constantData = spark.createDataFrame(Seq(
       LabeledPoint(4.0, Vectors.dense(0.0)),

From af8fb453e86b08956d06ee1f37ef3eb393287b74 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 9 Sep 2016 13:49:49 -0700
Subject: [PATCH 15/24] rewriting family detection logic

---
 .../ml/classification/LogisticRegression.scala     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index b5ef73cfa8356..c4f283be0b4f4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -359,14 +359,16 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
     val isBinaryClassification = numClasses == 1 || numClasses == 2
-    val isMultinomial = ($(family) == "auto" && !isBinaryClassification) ||
-      ($(family) == "multinomial")
-    val numCoefficientSets = if (isMultinomial) numClasses else 1
-
-    if (!isMultinomial) {
-      require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
+    val isMultinomial = $(family) match {
+      case "binomial" =>
+        require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
         s"outcome classes but found $numClasses.")
+        false
+      case "multinomial" => true
+      case "auto" => !isBinaryClassification
+      case other => throw new IllegalArgumentException(s"Unsupported family: $other")
     }
+    val numCoefficientSets = if (isMultinomial) numClasses else 1
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +

From b27cb2c190f4cdc7bd8540c8cd83e55814e52378 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 9 Sep 2016 14:32:22 -0700
Subject: [PATCH 16/24] set family explicitly in tests

---
 .../LogisticRegressionSuite.scala             | 78 ++++++++++---------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 5af825ca0c0c3..2060d7d113376 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -153,7 +153,7 @@ class LogisticRegressionSuite
   }
 
   test("setThreshold, getThreshold") {
-    val lr = new LogisticRegression
+    val lr = new LogisticRegression().setFamily("binomial")
     // default
     assert(lr.getThreshold === 0.5, "LogisticRegression.threshold should default to 0.5")
     withClue("LogisticRegression should not have thresholds set by default.") {
@@ -170,7 +170,7 @@ class LogisticRegressionSuite
     lr.setThreshold(0.5)
     assert(lr.getThresholds === Array(0.5, 0.5))
     // Set via thresholds
-    val lr2 = new LogisticRegression
+    val lr2 = new LogisticRegression().setFamily("binomial")
     lr2.setThresholds(Array(0.3, 0.7))
     val expectedThreshold = 1.0 / (1.0 + 0.3 / 0.7)
     assert(lr2.getThreshold ~== expectedThreshold relTol 1E-7)
@@ -234,11 +234,16 @@ class LogisticRegressionSuite
   }
 
   test("logistic regression doesn't fit intercept when fitIntercept is off") {
-    val lr = new LogisticRegression
+    val lr = new LogisticRegression().setFamily("binomial")
     lr.setFitIntercept(false)
     val model = lr.fit(smallBinaryDataset)
     assert(model.intercept === 0.0)
 
+    val mlr = new LogisticRegression().setFamily("multinomial")
+    mlr.setFitIntercept(false)
+    val mlrModel = mlr.fit(smallMultinomialDataset)
+    assert(mlrModel.interceptVector === Vectors.sparse(3, Seq()))
+
     // copied model must have the same parent.
     MLTestingUtils.checkCopy(model)
   }
@@ -288,7 +293,7 @@ class LogisticRegressionSuite
   }
 
   test("multinomial logistic regression: Predictor, Classifier methods") {
-    val mlr = new LogisticRegression
+    val mlr = new LogisticRegression().setFamily("multinomial")
 
     val model = mlr.fit(smallMultinomialDataset)
     assert(model.numClasses === 3)
@@ -335,7 +340,7 @@ class LogisticRegressionSuite
   }
 
   test("binary logistic regression: Predictor, Classifier methods") {
-    val lr = new LogisticRegression
+    val lr = new LogisticRegression().setFamily("binomial")
 
     val model = lr.fit(smallBinaryDataset)
     assert(model.numClasses === 2)
@@ -364,7 +369,7 @@ class LogisticRegressionSuite
   }
 
   test("coefficients and intercept methods") {
-    val mlr = new LogisticRegression().setMaxIter(1)
+    val mlr = new LogisticRegression().setMaxIter(1).setFamily("multinomial")
     val mlrModel = mlr.fit(smallMultinomialDataset)
     val thrownCoef = intercept[SparkException] {
       mlrModel.coefficients
@@ -375,7 +380,7 @@ class LogisticRegressionSuite
     assert(thrownCoef.getMessage().contains("use coefficientMatrix instead"))
     assert(thrownIntercept.getMessage().contains("use interceptVector instead"))
 
-    val blr = new LogisticRegression().setMaxIter(1)
+    val blr = new LogisticRegression().setMaxIter(1).setFamily("binomial")
     val blrModel = blr.fit(smallBinaryDataset)
     assert(blrModel.coefficients.size === 1)
     assert(blrModel.intercept !== 0.0)
@@ -1751,7 +1756,7 @@ class LogisticRegressionSuite
     val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
       LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
     })
-    val lr = new LogisticRegression().setWeightCol("weight")
+    val lr = new LogisticRegression().setFamily("binomial").setWeightCol("weight")
     val model = lr.fit(outlierData)
     val results = model.transform(testData).select("label", "prediction").collect()
 
@@ -1775,7 +1780,7 @@ class LogisticRegressionSuite
     val testData = spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
       LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
     })
-    val mlr = new LogisticRegression().setWeightCol("weight")
+    val mlr = new LogisticRegression().setFamily("multinomial").setWeightCol("weight")
     val model = mlr.fit(outlierData)
     val results = model.transform(testData).select("label", "prediction").collect()
 
@@ -1829,9 +1834,9 @@ class LogisticRegressionSuite
   }
 
   test("set initial model") {
-    val lr = new LogisticRegression()
+    val lr = new LogisticRegression().setFamily("binomial")
     val model1 = lr.fit(smallBinaryDataset)
-    val lr2 = new LogisticRegression().setInitialModel(model1).setMaxIter(5)
+    val lr2 = new LogisticRegression().setInitialModel(model1).setMaxIter(5).setFamily("binomial")
     val model2 = lr2.fit(smallBinaryDataset)
     val predictions1 = model1.transform(smallBinaryDataset).select("prediction").collect()
     val predictions2 = model2.transform(smallBinaryDataset).select("prediction").collect()
@@ -1840,9 +1845,10 @@ class LogisticRegressionSuite
     }
     assert(model2.summary.totalIterations === 1)
 
-    val lr3 = new LogisticRegression()
+    val lr3 = new LogisticRegression().setFamily("multinomial")
     val model3 = lr3.fit(smallMultinomialDataset)
-    val lr4 = new LogisticRegression().setInitialModel(model3).setMaxIter(5)
+    val lr4 = new LogisticRegression()
+      .setInitialModel(model3).setMaxIter(5).setFamily("multinomial")
     val model4 = lr4.fit(smallMultinomialDataset)
     val predictions3 = model3.transform(smallMultinomialDataset).select("prediction").collect()
     val predictions4 = model4.transform(smallMultinomialDataset).select("prediction").collect()
@@ -1852,7 +1858,7 @@ class LogisticRegressionSuite
     // TODO: check that it converges in a single iteration when model summary is available
   }
 
-  test("logistic regression with all labels the same") {
+  test("binary logistic regression with all labels the same") {
     val sameLabels = smallBinaryDataset
       .withColumn("zeroLabel", lit(0.0))
       .withColumn("oneLabel", lit(1.0))
@@ -1861,6 +1867,7 @@ class LogisticRegressionSuite
     val lrIntercept = new LogisticRegression()
       .setFitIntercept(true)
       .setMaxIter(3)
+      .setFamily("binomial")
 
     val allZeroInterceptModel = lrIntercept
       .setLabelCol("zeroLabel")
@@ -1880,6 +1887,7 @@ class LogisticRegressionSuite
     val lrNoIntercept = new LogisticRegression()
       .setFitIntercept(false)
       .setMaxIter(3)
+      .setFamily("binomial")
 
     val allZeroNoInterceptModel = lrNoIntercept
       .setLabelCol("zeroLabel")
@@ -1894,26 +1902,6 @@ class LogisticRegressionSuite
     assert(allOneNoInterceptModel.summary.totalIterations > 0)
   }
 
-  test("compressed storage") {
-    val moreClassesThanFeatures = spark.createDataFrame(Seq(
-      LabeledPoint(4.0, Vectors.dense(0.0, 0.0, 0.0)),
-      LabeledPoint(4.0, Vectors.dense(1.0, 1.0, 1.0)),
-      LabeledPoint(4.0, Vectors.dense(2.0, 2.0, 2.0)))
-    )
-    val mlr = new LogisticRegression().setFamily("multinomial")
-    val model = mlr.fit(moreClassesThanFeatures)
-    assert(model.coefficientMatrix.isInstanceOf[SparseMatrix])
-    assert(model.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 4)
-    val moreFeaturesThanClasses = spark.createDataFrame(Seq(
-      LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0)),
-      LabeledPoint(1.0, Vectors.dense(1.0, 1.0, 1.0)),
-      LabeledPoint(1.0, Vectors.dense(2.0, 2.0, 2.0)))
-    )
-    val model2 = mlr.fit(moreFeaturesThanClasses)
-    assert(model2.coefficientMatrix.isInstanceOf[SparseMatrix])
-    assert(model2.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 3)
-  }
-
   test("multiclass logistic regression with all labels the same") {
     val constantData = spark.createDataFrame(Seq(
       LabeledPoint(4.0, Vectors.dense(0.0)),
@@ -1959,8 +1947,28 @@ class LogisticRegressionSuite
     // TODO: check num iters is zero when it become available in the model
   }
 
+  test("compressed storage") {
+    val moreClassesThanFeatures = spark.createDataFrame(Seq(
+      LabeledPoint(4.0, Vectors.dense(0.0, 0.0, 0.0)),
+      LabeledPoint(4.0, Vectors.dense(1.0, 1.0, 1.0)),
+      LabeledPoint(4.0, Vectors.dense(2.0, 2.0, 2.0)))
+    )
+    val mlr = new LogisticRegression().setFamily("multinomial")
+    val model = mlr.fit(moreClassesThanFeatures)
+    assert(model.coefficientMatrix.isInstanceOf[SparseMatrix])
+    assert(model.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 4)
+    val moreFeaturesThanClasses = spark.createDataFrame(Seq(
+      LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0, 1.0, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(2.0, 2.0, 2.0)))
+    )
+    val model2 = mlr.fit(moreFeaturesThanClasses)
+    assert(model2.coefficientMatrix.isInstanceOf[SparseMatrix])
+    assert(model2.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 3)
+  }
+
   test("numClasses specified in metadata/inferred") {
-    val lr = new LogisticRegression().setMaxIter(1)
+    val lr = new LogisticRegression().setMaxIter(1).setFamily("multinomial")
 
     // specify more classes than unique label values
     val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(4).toMetadata()

From be030b5269518fd5c018e9e172cea7685addcb03 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 9 Sep 2016 16:01:32 -0700
Subject: [PATCH 17/24] fix compression bug

---
 .../apache/spark/ml/classification/LogisticRegression.scala  | 5 +++--
 .../spark/ml/classification/LogisticRegressionSuite.scala    | 5 +++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index c4f283be0b4f4..fb491dd9d60be 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -358,6 +358,7 @@ class LogisticRegression @Since("1.2.0") (
         n
       case None => histogram.length
     }
+
     val isBinaryClassification = numClasses == 1 || numClasses == 2
     val isMultinomial = $(family) match {
       case "binomial" =>
@@ -393,11 +394,11 @@ class LogisticRegression @Since("1.2.0") (
         logWarning(s"All labels are the same value and fitIntercept=true, so the coefficients " +
           s"will be zeros. Training is not needed.")
         val constantLabelIndex = Vectors.dense(histogram).argmax
-        val coefMatrix = if (numFeatures < numClasses) {
+        val coefMatrix = if (numFeatures < numCoefficientSets) {
           new SparseMatrix(numCoefficientSets, numFeatures,
             Array.fill(numFeatures + 1)(0), Array.empty[Int], Array.empty[Double])
         } else {
-          new SparseMatrix(numCoefficientSets, numFeatures, Array.fill(numClasses + 1)(0),
+          new SparseMatrix(numCoefficientSets, numFeatures, Array.fill(numCoefficientSets + 1)(0),
             Array.empty[Int], Array.empty[Double], isTransposed = true)
         }
         val interceptVec = if (isMultinomial) {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 2060d7d113376..e3e3000018a12 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1965,6 +1965,11 @@ class LogisticRegressionSuite
     val model2 = mlr.fit(moreFeaturesThanClasses)
     assert(model2.coefficientMatrix.isInstanceOf[SparseMatrix])
     assert(model2.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 3)
+
+    val blr = new LogisticRegression().setFamily("binomial")
+    val blrModel = blr.fit(moreFeaturesThanClasses)
+    assert(blrModel.coefficientMatrix.isInstanceOf[SparseMatrix])
+    assert(blrModel.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length === 2)
   }
 
   test("numClasses specified in metadata/inferred") {

From 73158e5b24e5e58de8284aef84297bdefa75e8ca Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Fri, 9 Sep 2016 19:18:56 -0700
Subject: [PATCH 18/24] use regex util

---
 .../apache/spark/ml/classification/LogisticRegression.scala | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index fb491dd9d60be..4eb9dfd9cc128 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -42,6 +42,7 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.{col, lit}
 import org.apache.spark.sql.types.DoubleType
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.VersionUtils
 
 /**
  * Params for logistic regression.
@@ -298,6 +299,7 @@ class LogisticRegression @Since("1.2.0") (
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
+ *
    * @group expertSetParam
    */
   @Since("2.1.0")
@@ -966,8 +968,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
 
     override def load(path: String): LogisticRegressionModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
-      val versionRegex = "([0-9]+)\\.([0-9]+)\\.(.+)".r
-      val versionRegex(major, minor, _) = metadata.sparkVersion
+      val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
@@ -1386,6 +1387,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
+ *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in

From f538e1e36c6be6201b4408afbc89f2a9daf6cea7 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Mon, 12 Sep 2016 15:09:18 -0700
Subject: [PATCH 19/24] sparse storage for binary lor

---
 .../classification/LogisticRegression.scala   | 49 ++++++++++++-------
 .../LogisticRegressionSuite.scala             |  4 +-
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 4eb9dfd9cc128..ed730230c6d29 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -299,7 +299,7 @@ class LogisticRegression @Since("1.2.0") (
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
- *
+   *
    * @group expertSetParam
    */
   @Since("2.1.0")
@@ -361,14 +361,13 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
 
-    val isBinaryClassification = numClasses == 1 || numClasses == 2
     val isMultinomial = $(family) match {
       case "binomial" =>
-        require(isBinaryClassification, s"Binomial family only supports 1 or 2 " +
+        require(numClasses == 1 || numClasses == 2, s"Binomial family only supports 1 or 2 " +
         s"outcome classes but found $numClasses.")
         false
       case "multinomial" => true
-      case "auto" => !isBinaryClassification
+      case "auto" => numClasses > 2
       case other => throw new IllegalArgumentException(s"Unsupported family: $other")
     }
     val numCoefficientSets = if (isMultinomial) numClasses else 1
@@ -396,6 +395,7 @@ class LogisticRegression @Since("1.2.0") (
         logWarning(s"All labels are the same value and fitIntercept=true, so the coefficients " +
           s"will be zeros. Training is not needed.")
         val constantLabelIndex = Vectors.dense(histogram).argmax
+        // TODO: use `compressed` after SPARK-17471
         val coefMatrix = if (numFeatures < numCoefficientSets) {
           new SparseMatrix(numCoefficientSets, numFeatures,
             Array.fill(numFeatures + 1)(0), Array.empty[Int], Array.empty[Double])
@@ -587,21 +587,34 @@ class LogisticRegression @Since("1.2.0") (
             0.0
           }
         }
-        val coefficientMatrix =
-          new DenseMatrix(numCoefficientSets, numFeatures, coefficientArray, isTransposed = true)
 
         if ($(regParam) == 0.0 && isMultinomial) {
           /*
-            When no regularization is applied, the coefficients lack identifiability because
-            we do not use a pivot class. We can add any constant value to the coefficients and
-            get the same likelihood. So here, we choose the mean centered coefficients for
+            When no regularization is applied, the multinomial coefficients lack identifiability
+            because we do not use a pivot class. We can add any constant value to the coefficients
+            and get the same likelihood. So here, we choose the mean centered coefficients for
             reproducibility. This method follows the approach in glmnet, described here:
 
             Friedman, et al. "Regularization Paths for Generalized Linear Models via
               Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
            */
-          val coefficientMean = coefficientMatrix.values.sum / coefficientMatrix.values.length
-          coefficientMatrix.update(_ - coefficientMean)
+          val coefficientMean = coefficientArray.sum / coefficientArray.length
+          coefficientArray.indices.foreach { i => coefficientArray(i) -= coefficientMean}
+        }
+
+        val denseCoefficientMatrix =
+          new DenseMatrix(numCoefficientSets, numFeatures, coefficientArray, isTransposed = true)
+        // TODO: use `denseCoefficientMatrix.compressed` after SPARK-17471
+        val compressedCoefficientMatrix = if (isMultinomial) {
+          denseCoefficientMatrix
+        } else {
+          val compressedVector = Vectors.dense(coefficientArray).compressed
+          compressedVector match {
+            case dv: DenseVector => denseCoefficientMatrix
+            case sv: SparseVector =>
+              new SparseMatrix(1, numFeatures, Array(0, sv.indices.length), sv.indices, sv.values,
+                isTransposed = true)
+          }
         }
 
         val interceptsArray: Array[Double] = if ($(fitIntercept)) {
@@ -612,10 +625,8 @@ class LogisticRegression @Since("1.2.0") (
         } else {
           Array[Double]()
         }
-        /*
-          The intercepts are never regularized, so we always center the mean.
-         */
         val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
+          // The intercepts are never regularized, so we always center the mean.
           val interceptMean = interceptsArray.sum / numClasses
           interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
           Vectors.dense(interceptsArray)
@@ -624,7 +635,7 @@ class LogisticRegression @Since("1.2.0") (
         } else {
           Vectors.sparse(numCoefficientSets, Seq())
         }
-        (coefficientMatrix, interceptVector, arrayBuilder.result())
+        (compressedCoefficientMatrix, interceptVector.compressed, arrayBuilder.result())
       }
     }
 
@@ -687,8 +698,12 @@ class LogisticRegressionModel private[spark] (
   // convert to appropriate vector representation without replicating data
   private lazy val _coefficients: Vector = coefficientMatrix match {
     case dm: DenseMatrix => Vectors.dense(dm.values)
-    // TODO: better way to flatten sparse matrix?
-    case sm: SparseMatrix => Vectors.fromBreeze(sm.asBreeze.flatten(View.Require))
+    case sm: SparseMatrix =>
+      if (coefficientMatrix.isTransposed) {
+        Vectors.sparse(coefficientMatrix.numCols, sm.rowIndices, sm.values)
+      } else {
+        throw new IllegalStateException("LogisticRegressionModel coefficients should be row major.")
+      }
   }
 
   @Since("1.3.0")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e3e3000018a12..e7304401f324b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -604,7 +604,9 @@ class LogisticRegressionSuite
     val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
 
     assert(model2.intercept ~== interceptR2 relTol 1E-2)
-    assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
+    assert(model2.coefficients ~== coefficientsR2 absTol 1E-3)
+    // TODO: move this to a standalone test of compression after SPARK-17471
+    assert(model2.coefficients.isInstanceOf[SparseVector])
   }
 
   test("binary logistic regression without intercept with L1 regularization") {

From a3a7d20414ff9a2c9df1fdf60417a4c307dd472f Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Tue, 13 Sep 2016 22:41:45 -0700
Subject: [PATCH 20/24] remove scores and address some review

---
 .../classification/LogisticRegression.scala   | 46 ++++---------------
 .../ProbabilisticClassifier.scala             |  3 +-
 2 files changed, 10 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index ed730230c6d29..151002b37df65 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.classification
 
 import scala.collection.mutable
 
-import breeze.linalg.{DenseVector => BDV, View}
+import breeze.linalg.{DenseVector => BDV}
 import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
 import org.apache.hadoop.fs.Path
 
@@ -696,14 +696,13 @@ class LogisticRegressionModel private[spark] (
   }
 
   // convert to appropriate vector representation without replicating data
-  private lazy val _coefficients: Vector = coefficientMatrix match {
-    case dm: DenseMatrix => Vectors.dense(dm.values)
-    case sm: SparseMatrix =>
-      if (coefficientMatrix.isTransposed) {
-        Vectors.sparse(coefficientMatrix.numCols, sm.rowIndices, sm.values)
-      } else {
-        throw new IllegalStateException("LogisticRegressionModel coefficients should be row major.")
-      }
+  private lazy val _coefficients: Vector = {
+    require(coefficientMatrix.isTransposed,
+      "LogisticRegressionModel coefficients should be row major.")
+    coefficientMatrix match {
+      case dm: DenseMatrix => Vectors.dense(dm.values)
+      case sm: SparseMatrix => Vectors.sparse(coefficientMatrix.numCols, sm.rowIndices, sm.values)
+    }
   }
 
   @Since("1.3.0")
@@ -746,35 +745,6 @@ class LogisticRegressionModel private[spark] (
     1.0 / (1.0 + math.exp(-m))
   }
 
-  /** Score (probability) for each class label. */
-  // TODO: do we need this anymore?
-  private val scores: Vector => Vector = (features) => {
-    val m = margins(features)
-    val maxMarginIndex = m.argmax
-    val marginArray = m.toArray
-    val maxMargin = marginArray(maxMarginIndex)
-
-    // adjust margins for overflow
-    val sum = {
-      var temp = 0.0
-      var k = 0
-      while (k < numClasses) {
-        marginArray(k) = if (maxMargin > 0) {
-          math.exp(marginArray(k) - maxMargin)
-        } else {
-          math.exp(marginArray(k))
-        }
-        temp += marginArray(k)
-        k += 1
-      }
-      temp
-    }
-
-    val scores = Vectors.dense(marginArray)
-    BLAS.scal(1 / sum, scores)
-    scores
-  }
-
   @Since("1.6.0")
   override val numFeatures: Int = coefficientMatrix.numCols
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 1a07aab663030..1b6e77542cc80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -205,7 +205,8 @@ abstract class ProbabilisticClassificationModel[
       var argMax = 0
       var max = Double.NegativeInfinity
       var i = 0
-      while (i < probability.size) {
+      val probabilitySize = probability.size
+      while (i < probabilitySize) {
         if (thresholds(i) == 0.0) {
           max = Double.PositiveInfinity
           argMax = i

From cb1666e43ba604de780f937cc5d3eaac28e7a0f8 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 14 Sep 2016 07:42:16 -0700
Subject: [PATCH 21/24] transposed error in test suites

---
 .../org/apache/spark/ml/classification/OneVsRestSuite.scala   | 4 ++--
 .../org/apache/spark/ml/tuning/CrossValidatorSuite.scala      | 4 ++--
 .../apache/spark/ml/tuning/TrainValidationSplitSuite.scala    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 3ae47029c8dd8..01a043195ad3f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.feature.StringIndexer
-import org.apache.spark.ml.linalg.{Matrices, Vectors}
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MetadataUtils, MLTestingUtils}
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -61,7 +61,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
   test("params") {
     ParamsSuite.checkParams(new OneVsRest)
     val lrModel = new LogisticRegressionModel("logReg",
-      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
     val model = new OneVsRestModel("ovr", Metadata.empty, Array(lrModel))
     ParamsSuite.checkParams(model)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index 87c7c82e4c3b2..a0a2e87b10edf 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressio
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
 import org.apache.spark.ml.feature.HashingTF
-import org.apache.spark.ml.linalg.{Matrices, Vectors}
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamPair}
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -245,7 +245,7 @@ class CrossValidatorSuite
     val lr = new LogisticRegression()
       .setThreshold(0.6)
     val lrModel = new LogisticRegressionModel(lr.uid,
-      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
       .setMetricName("areaUnderPR")  // not default metric
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index 6c58bed9812c1..39e23e6c45dbb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
-import org.apache.spark.ml.linalg.{Matrices, Vectors}
+import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
@@ -134,7 +134,7 @@ class TrainValidationSplitSuite
     val lr = new LogisticRegression()
       .setThreshold(0.6)
     val lrModel = new LogisticRegressionModel(lr.uid,
-      Matrices.dense(1, 1, Array(0.0)), Vectors.dense(0.0), 2, false)
+      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
     val paramMaps = new ParamGridBuilder()

From bd7fca10e2081372574a6c9dd59da4aca9aaf13e Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 14 Sep 2016 14:02:26 -0700
Subject: [PATCH 22/24] update scaladoc and correct predict method

---
 .../classification/LogisticRegression.scala   | 44 ++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 151002b37df65..de5e23780c86a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -687,6 +687,11 @@ class LogisticRegressionModel private[spark] (
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams with MLWritable {
 
+  /**
+   * A vector of model coefficients for "binomial" logistic regression. If this model was trained
+   * using the "multinomial" family then an exception is thrown.
+   * @return Vector
+   */
   @Since("2.0.0")
   def coefficients: Vector = if (isMultinomial) {
     throw new SparkException("Multinomial models contain a matrix of coefficients, use " +
@@ -705,6 +710,11 @@ class LogisticRegressionModel private[spark] (
     }
   }
 
+  /**
+   * The model intercept for "binomial" logistic regression. If this model was fit with the
+   * "multinomial" family then an exception is thrown.
+   * @return Double
+   */
   @Since("1.3.0")
   def intercept: Double = if (isMultinomial) {
     throw new SparkException("Multinomial models contain a vector of intercepts, use " +
@@ -745,6 +755,34 @@ class LogisticRegressionModel private[spark] (
     1.0 / (1.0 + math.exp(-m))
   }
 
+  /** Score (probability) for each class label. */
+  private val scores: Vector => Vector = (features) => {
+    val m = margins(features)
+    val maxMarginIndex = m.argmax
+    val marginArray = m.toArray
+    val maxMargin = marginArray(maxMarginIndex)
+
+    // adjust margins for overflow
+    val sum = {
+      var temp = 0.0
+      var k = 0
+      while (k < numClasses) {
+        marginArray(k) = if (maxMargin > 0) {
+          math.exp(marginArray(k) - maxMargin)
+        } else {
+          math.exp(marginArray(k))
+        }
+        temp += marginArray(k)
+        k += 1
+      }
+      temp
+    }
+
+    val scores = Vectors.dense(marginArray)
+    BLAS.scal(1 / sum, scores)
+    scores
+  }
+
   @Since("1.6.0")
   override val numFeatures: Int = coefficientMatrix.numCols
 
@@ -802,7 +840,11 @@ class LogisticRegressionModel private[spark] (
    * The behavior of this can be adjusted using [[thresholds]].
    */
   override protected def predict(features: Vector): Double = if (isMultinomial) {
-    super.predict(features)
+    if (isDefined(thresholds)) {
+      probability2prediction(scores(features))
+    } else {
+      super.predict(features)
+    }
   } else {
     // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
     if (score(features) > getThreshold) 1 else 0

From 38fad988956458aac59109613c7d468855a0faf8 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 14 Sep 2016 17:05:19 -0700
Subject: [PATCH 23/24] revert predict changes and add tests

---
 .../classification/LogisticRegression.scala   | 34 +--------------
 .../LogisticRegressionSuite.scala             | 43 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index de5e23780c86a..2229009571a0d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -755,34 +755,6 @@ class LogisticRegressionModel private[spark] (
     1.0 / (1.0 + math.exp(-m))
   }
 
-  /** Score (probability) for each class label. */
-  private val scores: Vector => Vector = (features) => {
-    val m = margins(features)
-    val maxMarginIndex = m.argmax
-    val marginArray = m.toArray
-    val maxMargin = marginArray(maxMarginIndex)
-
-    // adjust margins for overflow
-    val sum = {
-      var temp = 0.0
-      var k = 0
-      while (k < numClasses) {
-        marginArray(k) = if (maxMargin > 0) {
-          math.exp(marginArray(k) - maxMargin)
-        } else {
-          math.exp(marginArray(k))
-        }
-        temp += marginArray(k)
-        k += 1
-      }
-      temp
-    }
-
-    val scores = Vectors.dense(marginArray)
-    BLAS.scal(1 / sum, scores)
-    scores
-  }
-
   @Since("1.6.0")
   override val numFeatures: Int = coefficientMatrix.numCols
 
@@ -840,11 +812,7 @@ class LogisticRegressionModel private[spark] (
    * The behavior of this can be adjusted using [[thresholds]].
    */
   override protected def predict(features: Vector): Double = if (isMultinomial) {
-    if (isDefined(thresholds)) {
-      probability2prediction(scores(features))
-    } else {
-      super.predict(features)
-    }
+    super.predict(features)
   } else {
     // Note: We should use getThreshold instead of $(threshold) since getThreshold is overridden.
     if (score(features) > getThreshold) 1 else 0
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e7304401f324b..e6d520f69bd7c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -231,6 +231,12 @@ class LogisticRegressionSuite
     assert(scaledPredictions.zip(basePredictions).forall { case (scaled, base) =>
       scaled.getDouble(0) === base.getDouble(0)
     })
+
+    // force it to use the predict method
+    model.setRawPredictionCol("").setProbabilityCol("").setThresholds(Array(0, 1, 1))
+    val predictionsWithPredict =
+      model.transform(smallMultinomialDataset).select("prediction").collect()
+    assert(predictionsWithPredict.forall(_.getDouble(0) === 0.0))
   }
 
   test("logistic regression doesn't fit intercept when fitIntercept is off") {
@@ -293,6 +299,8 @@ class LogisticRegressionSuite
   }
 
   test("multinomial logistic regression: Predictor, Classifier methods") {
+    val sqlContext = smallMultinomialDataset.sqlContext
+    import sqlContext.implicits._
     val mlr = new LogisticRegression().setFamily("multinomial")
 
     val model = mlr.fit(smallMultinomialDataset)
@@ -337,9 +345,27 @@ class LogisticRegressionSuite
         val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2
         assert(pred == predFromProb)
     }
+
+    // force it to use probability2prediction
+    model.setProbabilityCol("")
+    val resultsUsingProb2Predict =
+      model.transform(smallMultinomialDataset).select("prediction").as[Double].collect()
+    resultsUsingProb2Predict.zip(results.select("prediction").as[Double].collect()).foreach {
+      case (pred1, pred2) => assert(pred1 === pred2)
+    }
+
+    // force it to use predict
+    model.setRawPredictionCol("").setProbabilityCol("")
+    val resultsUsingPredict =
+      model.transform(smallMultinomialDataset).select("prediction").as[Double].collect()
+    resultsUsingPredict.zip(results.select("prediction").as[Double].collect()).foreach {
+      case (pred1, pred2) => assert(pred1 === pred2)
+    }
   }
 
   test("binary logistic regression: Predictor, Classifier methods") {
+    val sqlContext = smallBinaryDataset.sqlContext
+    import sqlContext.implicits._
     val lr = new LogisticRegression().setFamily("binomial")
 
     val model = lr.fit(smallBinaryDataset)
@@ -347,7 +373,6 @@ class LogisticRegressionSuite
     val numFeatures = smallBinaryDataset.select("features").first().getAs[Vector](0).size
     assert(model.numFeatures === numFeatures)
 
-    val threshold = model.getThreshold
     val results = model.transform(smallBinaryDataset)
 
     // Compare rawPrediction with probability
@@ -366,6 +391,22 @@ class LogisticRegressionSuite
         val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2
         assert(pred == predFromProb)
     }
+
+    // force it to use probability2prediction
+    model.setProbabilityCol("")
+    val resultsUsingProb2Predict =
+      model.transform(smallBinaryDataset).select("prediction").as[Double].collect()
+    resultsUsingProb2Predict.zip(results.select("prediction").as[Double].collect()).foreach {
+      case (pred1, pred2) => assert(pred1 === pred2)
+    }
+
+    // force it to use predict
+    model.setRawPredictionCol("").setProbabilityCol("")
+    val resultsUsingPredict =
+      model.transform(smallBinaryDataset).select("prediction").as[Double].collect()
+    resultsUsingPredict.zip(results.select("prediction").as[Double].collect()).foreach {
+      case (pred1, pred2) => assert(pred1 === pred2)
+    }
   }
 
   test("coefficients and intercept methods") {

From 4dae59569732ace5cb2cf583d6db315fb3eda596 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Mon, 19 Sep 2016 17:47:56 -0700
Subject: [PATCH 24/24] code review, add secondary constructor

---
 .../spark/ml/classification/LogisticRegression.scala   | 10 ++++++++--
 .../ml/classification/LogisticRegressionSuite.scala    |  5 ++---
 .../spark/ml/classification/OneVsRestSuite.scala       |  3 +--
 .../apache/spark/ml/tuning/CrossValidatorSuite.scala   |  3 +--
 .../spark/ml/tuning/TrainValidationSplitSuite.scala    |  3 +--
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 2229009571a0d..343d50c790e85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -75,7 +75,6 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
     set(threshold, value)
   }
 
-
   /**
    * Param for the name of family which is a description of the label distribution
    * to be used in the model.
@@ -687,6 +686,14 @@ class LogisticRegressionModel private[spark] (
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams with MLWritable {
 
+  require(coefficientMatrix.numRows == interceptVector.size, s"Dimension mismatch! Expected " +
+    s"coefficientMatrix.numRows == interceptVector.size, but ${coefficientMatrix.numRows} != " +
+    s"${interceptVector.size}")
+
+  private[spark] def this(uid: String, coefficients: Vector, intercept: Double) =
+    this(uid, new DenseMatrix(1, coefficients.size, coefficients.toArray, isTransposed = true),
+      Vectors.dense(intercept), 2, isMultinomial = false)
+
   /**
    * A vector of model coefficients for "binomial" logistic regression. If this model was trained
    * using the "multinomial" family then an exception is thrown.
@@ -1382,7 +1389,6 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
- *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e6d520f69bd7c..2623759f24d91 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -112,8 +112,7 @@ class LogisticRegressionSuite
 
   test("params") {
     ParamsSuite.checkParams(new LogisticRegression)
-    val model = new LogisticRegressionModel("logReg",
-      new DenseMatrix(1, 1, Array(0.0)), Vectors.dense(0.0), 2, isMultinomial = false)
+    val model = new LogisticRegressionModel("logReg", Vectors.dense(0.0), 0.0)
     ParamsSuite.checkParams(model)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 01a043195ad3f..99dd5854ff649 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -60,8 +60,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
 
   test("params") {
     ParamsSuite.checkParams(new OneVsRest)
-    val lrModel = new LogisticRegressionModel("logReg",
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
+    val lrModel = new LogisticRegressionModel("lr", Vectors.dense(0.0), 0.0)
     val model = new OneVsRestModel("ovr", Metadata.empty, Array(lrModel))
     ParamsSuite.checkParams(model)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index a0a2e87b10edf..750dc5bf01e6a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -244,8 +244,7 @@ class CrossValidatorSuite
   test("read/write: CrossValidatorModel") {
     val lr = new LogisticRegression()
       .setThreshold(0.6)
-    val lrModel = new LogisticRegressionModel(lr.uid,
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
+    val lrModel = new LogisticRegressionModel(lr.uid, Vectors.dense(1.0, 2.0), 1.2)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
       .setMetricName("areaUnderPR")  // not default metric
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index 39e23e6c45dbb..9971371e47288 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -133,8 +133,7 @@ class TrainValidationSplitSuite
   test("read/write: TrainValidationSplitModel") {
     val lr = new LogisticRegression()
       .setThreshold(0.6)
-    val lrModel = new LogisticRegressionModel(lr.uid,
-      new DenseMatrix(1, 1, Array(0.0), isTransposed = true), Vectors.dense(0.0), 2, false)
+    val lrModel = new LogisticRegressionModel(lr.uid, Vectors.dense(1.0, 2.0), 1.2)
       .setThreshold(0.6)
     val evaluator = new BinaryClassificationEvaluator()
     val paramMaps = new ParamGridBuilder()