From 18047484cf869ae5c6fce32c6b64b9069d709eae Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 1 Mar 2016 17:33:20 -0800
Subject: [PATCH 01/17] [SPARK-13430] Added summary classes for logistic and
 linear regression

---
 python/pyspark/ml/classification.py | 209 +++++++++++++++++++++++++++-
 python/pyspark/ml/regression.py     | 207 ++++++++++++++++++++++++++-
 python/pyspark/mllib/common.py      |  33 +++--
 3 files changed, 436 insertions(+), 13 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3179fb30ab4d7..51dabff089eb3 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -23,10 +23,11 @@
 from pyspark.ml.param.shared import *
 from pyspark.ml.regression import (
     RandomForestParams, TreeEnsembleParams, DecisionTreeModel, TreeEnsembleModels)
-from pyspark.mllib.common import inherit_doc
+from pyspark.mllib.common import inherit_doc, JavaCallable
 
 
-__all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassifier',
+__all__ = ['LogisticRegression', 'LogisticRegressionModel', 'BinaryLogisticRegressionSummary',
+           'BinaryLogisticRegressionTrainingSummary', 'DecisionTreeClassifier',
            'DecisionTreeClassificationModel', 'GBTClassifier', 'GBTClassificationModel',
            'RandomForestClassifier', 'RandomForestClassificationModel', 'NaiveBayes',
            'NaiveBayesModel', 'MultilayerPerceptronClassifier',
@@ -218,6 +219,210 @@ def intercept(self):
         """
         return self._call_java("intercept")
 
+    @property
+    @since("2.0.0")
+    def summary(self):
+        """
+        Gets summary (e.g. residuals, mse, r-squared ) of model on
+        training set. An exception is thrown if
+        `trainingSummary == None`.
+        """
+        java_blrt_summary = self._call_java("summary")
+        return BinaryLogisticRegressionTrainingSummary._fromActiveSparkContext(java_blrt_summary)
+
+    @property
+    @since("2.0.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    """
+    TODO: enable once Scala API is made public
+    def evaluate(self, df):
+        ""
+        Evaluates the model on a testset.
+        @param dataset Test dataset to evaluate model on.
+        ""
+        java_blr_summary = self._call_java("evaluate", df)
+        return BinaryLogisticRegressionSummary._fromActiveSparkContext(java_blr_summary)
+    """
+
+
+class LogisticRegressionSummary(JavaCallable):
+    """
+    Abstraction for Logistic Regression Results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def predictions(self):
+        """
+        Dataframe outputted by the model's `transform` method.
+        """
+        return self._call("predictions")
+
+    @property
+    @since("2.0.0")
+    def probabilityCol(self):
+        """
+        Field in "predictions" which gives the calibrated probability
+        of each instance as a vector.
+        """
+        return self._call("probabilityCol")
+
+    @property
+    @since("2.0.0")
+    def labelCol(self):
+        """
+        Field in "predictions" which gives the true label of each
+        instance.
+        """
+        return self._call("labelCol")
+
+    @property
+    @since("2.0.0")
+    def featuresCol(self):
+        """
+        Field in "predictions" which gives the features of each instance
+        as a vector.
+        """
+        return self._call("featuresCol")
+
+
+class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
+    """
+    Abstraction for multinomial Logistic Regression Training results.
+    Currently, the training summary ignores the training weights except
+    for the objective trace.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def objectiveHistory(self):
+        """
+        Objective function (scaled loss + regularization) at each iteration.
+        """
+        return self._call("objectiveHistory")
+
+    @property
+    @since("2.0.0")
+    def totalIterations(self):
+        """
+        Number of training iterations until termination.
+        """
+        return self._call("totalIterations")
+
+
+class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Binary Logistic regression results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def roc(self):
+        """
+        Returns the receiver operating characteristic (ROC) curve,
+        which is an Dataframe having two fields (FPR, TPR) with
+        (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
+        Reference: http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("roc")
+
+    @property
+    @since("2.0.0")
+    def areaUnderROC(self):
+        """
+        Computes the area under the receiver operating characteristic
+        (ROC) curve.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("areaUnderROC")
+
+    @property
+    @since("2.0.0")
+    def pr(self):
+        """
+        Returns the precision-recall curve, which is an Dataframe
+        containing two fields recall, precision with (0.0, 1.0) prepended to it.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("pr")
+
+    @property
+    @since("2.0.0")
+    def fMeasureByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, F-Measure) curve
+        with beta = 1.0.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("fMeasureByThreshold")
+
+    @property
+    @since("2.0.0")
+    def precisionByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, precision) curve.
+        Every possible probability obtained in transforming the dataset
+        are used as thresholds used in calculating the precision.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("precisionByThreshold")
+
+    @property
+    @since("2.0.0")
+    def recallByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, recall) curve.
+        Every possible probability obtained in transforming the dataset
+        are used as thresholds used in calculating the recall.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LogisticRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("recallByThreshold")
+
+
+class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
+                                              LogisticRegressionTrainingSummary):
+    """
+    .. note:: Experimental
+
+    Binary Logistic regression training results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+    pass
+
 
 class TreeClassifierParams(object):
     """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 6b994fe9f93b4..7dd6eb2a9bc8c 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -21,7 +21,7 @@
 from pyspark.ml.param.shared import *
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel
-from pyspark.mllib.common import inherit_doc
+from pyspark.mllib.common import inherit_doc, JavaCallable
 
 
 __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
@@ -29,6 +29,7 @@
            'GBTRegressor', 'GBTRegressionModel',
            'IsotonicRegression', 'IsotonicRegressionModel',
            'LinearRegression', 'LinearRegressionModel',
+           'LinearRegressionSummary', 'LinearRegressionTrainingSummary',
            'RandomForestRegressor', 'RandomForestRegressionModel']
 
 
@@ -131,7 +132,6 @@ def weights(self):
         """
         Model weights.
         """
-
         warnings.warn("weights is deprecated. Use coefficients instead.")
         return self._call_java("weights")
 
@@ -151,6 +151,209 @@ def intercept(self):
         """
         return self._call_java("intercept")
 
+    @property
+    @since("2.0.0")
+    def summary(self):
+        """
+        Gets summary (e.g. residuals, mse, r-squared ) of model on
+        training set. An exception is thrown if
+        `trainingSummary == None`.
+        """
+        java_lrt_summary = self._call_java("summary")
+        return LinearRegressionTrainingSummary._fromActiveSparkContext(java_lrt_summary)
+
+    @property
+    @since("2.0.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    """
+    TODO: enable once Scala API is made public
+    def evaluate(self, df):
+        ""
+        Evaluates the model on a testset.
+        @param dataset Test dataset to evaluate model on.
+        ""
+        java_lr_summary = self._call_java("evaluate", df)
+        return LinearRegressionSummary._fromActiveSparkContext(java_lr_summary)
+    """
+
+
+class LinearRegressionSummary(JavaCallable):
+    """
+    .. note:: Experimental
+
+    Linear regression results evaluated on a dataset.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def predictions(self):
+        """
+        Dataframe outputted by the model's `transform` method.
+        """
+        return self._call("predictions")
+
+    @property
+    @since("2.0.0")
+    def predictionCol(self):
+        """
+        Field in "predictions" which gives the predicted value of
+        the label at each instance.
+        """
+        return self._call("predictionCol")
+
+    @property
+    @since("2.0.0")
+    def labelCol(self):
+        """
+        Field in "predictions" which gives the true label of each
+        instance.
+        """
+        return self._call("labelCol")
+
+    @property
+    @since("2.0.0")
+    def explainedVariance(self):
+        """
+        Returns the explained variance regression score.
+        explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
+        Reference: http://en.wikipedia.org/wiki/Explained_variation
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("explainedVariance")
+
+    @property
+    @since("2.0.0")
+    def meanAbsoluteError(self):
+        """
+        Returns the mean absolute error, which is a risk function
+        corresponding to the expected value of the absolute error
+        loss or l1-norm loss.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("meanAbsoluteError")
+
+    @property
+    @since("2.0.0")
+    def meanSquaredError(self):
+        """
+        Returns the mean squared error, which is a risk function
+        corresponding to the expected value of the squared error
+        loss or quadratic loss.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("meanSquaredError")
+
+    @property
+    @since("2.0.0")
+    def rootMeanSquaredError(self):
+        """
+        Returns the root mean squared error, which is defined as the
+        square root of the mean squared error.
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("rootMeanSquaredError")
+
+    @property
+    @since("2.0.0")
+    def r2(self):
+        """
+        Returns R^2^, the coefficient of determination.
+        Reference: http://en.wikipedia.org/wiki/Coefficient_of_determination
+
+        Note: This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
+        """
+        return self._call("r2")
+
+    @property
+    @since("2.0.0")
+    def residuals(self):
+        """
+        Residuals (label - predicted value)
+        """
+        return self._call("residuals")
+
+    @property
+    @since("2.0.0")
+    def numInstances(self):
+        """
+        Number of instances in DataFrame predictions
+        """
+        return self._call("numInstances")
+
+    @property
+    @since("2.0.0")
+    def devianceResiduals(self):
+        """
+        The weighted residuals, the usual residuals rescaled by the
+        square root of the instance weights.
+        """
+        return self._call("devianceResiduals")
+
+    @property
+    @since("2.0.0")
+    def coefficientStandardErrors(self):
+        """
+        Standard error of estimated coefficients and intercept.
+        """
+        return self._call("coefficientStandardErrors")
+
+    @property
+    @since("2.0.0")
+    def tValues(self):
+        """
+        T-statistic of estimated coefficients and intercept.
+        """
+        return self._call("tValues")
+
+    @property
+    @since("2.0.0")
+    def pValues(self):
+        """
+        Two-sided p-value of estimated coefficients and intercept.
+        """
+        return self._call("pValues")
+
+
+class LinearRegressionTrainingSummary(LinearRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Linear regression training results. Currently, the training summary ignores the
+    training coefficients except for the objective trace.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def totalIterations(self):
+        """
+        Number of training iterations until termination.
+        """
+        return self._call("totalIterations")
+
 
 @inherit_doc
 class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 9fda1b1682f57..888d475c3e421 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -130,20 +130,35 @@ def callMLlibFunc(name, *args):
     return callJavaFunc(sc, api, *args)
 
 
-class JavaModelWrapper(object):
+class JavaCallable(object):
     """
-    Wrapper for the model in JVM
+    Wrapper for an object in JVM to make Java calls
     """
-    def __init__(self, java_model):
-        self._sc = SparkContext.getOrCreate()
-        self._java_model = java_model
+    def __init__(self, sc, java_obj):
+        self._sc = sc
+        self._java_obj = java_obj
 
     def __del__(self):
-        self._sc._gateway.detach(self._java_model)
+        self._sc._gateway.detach(self._java_obj)
+
+    @classmethod
+    def _fromActiveSparkContext(cls, java_obj):
+        """Create from a currently active context"""
+        sc = SparkContext._active_spark_context
+        return cls(sc, java_obj)
+
+    def _call(self, name, *a):
+        """Call method of java_obj"""
+        return callJavaFunc(self._sc, getattr(self._java_obj, name), *a)
+
 
-    def call(self, name, *a):
-        """Call method of java_model"""
-        return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
+class JavaModelWrapper(JavaCallable):
+    """
+    Wrapper for the model in JVM
+    """
+    def __init__(self, java_model):
+        sc = SparkContext.getOrCreate()
+        super(JavaModelWrapper, self).__init__(sc, java_model)
 
 
 def inherit_doc(cls):

From 57f15cd675cd50a82ef479286d1c027b0c7f700b Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 2 Mar 2016 14:23:54 -0800
Subject: [PATCH 02/17] adding test for ml linear regression training summary

---
 python/pyspark/ml/tests.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 5fcfa9e61f6da..aa3d725d2cee3 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -443,6 +443,33 @@ def test_linear_regression(self):
             pass
 
 
+class TrainingSummaryTest(PySparkTestCase):
+
+    def test_linear_regression_summary(self):
+        from pyspark.mllib.linalg import Vectors
+        df = self.sc.parallelize([
+            Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
+            Row(label=0.0, weight=2.0, features=Vectors.dense(0.0))]).toDF()
+        lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertEqual(s.labelCol, "label")
+        self.assertAlmostEqual(s.explainedVariance, 0.25, 2)
+        self.assertAlmostEqual(s.meanAbsoluteError, 0.0)
+        self.assertAlmostEqual(s.meanSquaredError, 0.0)
+        self.assertAlmostEqual(s.rootMeanSquaredError, 0.0)
+        self.assertAlmostEqual(s.r2, 1.0, 2)
+        #residuals = s.residuals.rdd.map(lambda r: r.residuals).collect()
+        #self.assertTrue(isinstance(residuals, list) and isinstance(residuals[0], float))
+
+        #self.assertTrue(False)
+
+
 if __name__ == "__main__":
     from pyspark.ml.tests import *
     if xmlrunner:

From 4d4bf1a8766834bb49b7014057bac5c0a7f8a03a Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 2 Mar 2016 17:32:09 -0800
Subject: [PATCH 03/17] completed test for ml linear regression training
 summary

---
 python/pyspark/ml/tests.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index aa3d725d2cee3..64585277b09d4 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -447,10 +447,12 @@ class TrainingSummaryTest(PySparkTestCase):
 
     def test_linear_regression_summary(self):
         from pyspark.mllib.linalg import Vectors
-        df = self.sc.parallelize([
-            Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
-            Row(label=0.0, weight=2.0, features=Vectors.dense(0.0))]).toDF()
-        lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
+        sqlContext = SQLContext(self.sc)
+        df = sqlContext.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight",
+                              fitIntercept=False)
         model = lr.fit(df)
         self.assertTrue(model.hasSummary)
         s = model.summary
@@ -464,10 +466,17 @@ def test_linear_regression_summary(self):
         self.assertAlmostEqual(s.meanSquaredError, 0.0)
         self.assertAlmostEqual(s.rootMeanSquaredError, 0.0)
         self.assertAlmostEqual(s.r2, 1.0, 2)
-        #residuals = s.residuals.rdd.map(lambda r: r.residuals).collect()
-        #self.assertTrue(isinstance(residuals, list) and isinstance(residuals[0], float))
-
-        #self.assertTrue(False)
+        residuals = s.residuals.rdd.map(lambda r: r.residuals).collect()
+        self.assertTrue(isinstance(residuals, list) and isinstance(residuals[0], float))
+        self.assertEqual(s.numInstances, 2)
+        devResiduals = s.devianceResiduals
+        self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
+        coefStdErr = s.coefficientStandardErrors
+        self.assertTrue(isinstance(coefStdErr, list) and isinstance(coefStdErr[0], float))
+        tValues = s.tValues
+        self.assertTrue(isinstance(tValues, list) and isinstance(tValues[0], float))
+        pValues = s.pValues
+        self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
 
 
 if __name__ == "__main__":

From f9da8e6df323f5c6447d6f9cae771b910023b3ef Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 3 Mar 2016 13:53:49 -0800
Subject: [PATCH 04/17] adding test for ml logistic regression training summary

---
 python/pyspark/ml/tests.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 64585277b09d4..6eb2a8b962ec6 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -478,6 +478,31 @@ def test_linear_regression_summary(self):
         pValues = s.pValues
         self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
 
+    def test_logistic_regression_summary(self):
+        from pyspark.mllib.linalg import Vectors
+        sqlContext = SQLContext(self.sc)
+        df = sqlContext.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.roc, DataFrame))
+        self.assertAlmostEqual(s.areaUnderROC, 1.0, 2)
+        self.assertTrue(isinstance(s.pr, DataFrame))
+        self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
+
 
 if __name__ == "__main__":
     from pyspark.ml.tests import *

From ce69f9d5d5748f95c63883c5920e59bbae4e3b79 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 3 Mar 2016 14:53:02 -0800
Subject: [PATCH 05/17] changed residual to only check that DataFrame is
 returned

---
 python/pyspark/ml/tests.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 6eb2a8b962ec6..1765f8ef66c46 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -466,8 +466,7 @@ def test_linear_regression_summary(self):
         self.assertAlmostEqual(s.meanSquaredError, 0.0)
         self.assertAlmostEqual(s.rootMeanSquaredError, 0.0)
         self.assertAlmostEqual(s.r2, 1.0, 2)
-        residuals = s.residuals.rdd.map(lambda r: r.residuals).collect()
-        self.assertTrue(isinstance(residuals, list) and isinstance(residuals[0], float))
+        self.assertTrue(isinstance(s.residuals, DataFrame))
         self.assertEqual(s.numInstances, 2)
         devResiduals = s.devianceResiduals
         self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))

From e3ac04cfcc9e90a649bd7e46346cee110562b2f7 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 9 Mar 2016 17:19:04 -0800
Subject: [PATCH 06/17] Could not make JavaModel.call private because used in
 mllib, added _java_model property to fix mllib errors

---
 python/pyspark/ml/classification.py | 26 +++++++++++------------
 python/pyspark/ml/regression.py     | 32 ++++++++++++++---------------
 python/pyspark/mllib/common.py      |  8 ++++++--
 3 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 142a5fcaf6616..f407313d87323 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -229,7 +229,7 @@ def summary(self):
         `trainingSummary == None`.
         """
         java_blrt_summary = self._call_java("summary")
-        return BinaryLogisticRegressionTrainingSummary._fromActiveSparkContext(java_blrt_summary)
+        return BinaryLogisticRegressionTrainingSummary.fromActiveSparkContext(java_blrt_summary)
 
     @property
     @since("2.0.0")
@@ -265,7 +265,7 @@ def predictions(self):
         """
         Dataframe outputted by the model's `transform` method.
         """
-        return self._call("predictions")
+        return self.call("predictions")
 
     @property
     @since("2.0.0")
@@ -274,7 +274,7 @@ def probabilityCol(self):
         Field in "predictions" which gives the calibrated probability
         of each instance as a vector.
         """
-        return self._call("probabilityCol")
+        return self.call("probabilityCol")
 
     @property
     @since("2.0.0")
@@ -283,7 +283,7 @@ def labelCol(self):
         Field in "predictions" which gives the true label of each
         instance.
         """
-        return self._call("labelCol")
+        return self.call("labelCol")
 
     @property
     @since("2.0.0")
@@ -292,7 +292,7 @@ def featuresCol(self):
         Field in "predictions" which gives the features of each instance
         as a vector.
         """
-        return self._call("featuresCol")
+        return self.call("featuresCol")
 
 
 class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
@@ -310,7 +310,7 @@ def objectiveHistory(self):
         """
         Objective function (scaled loss + regularization) at each iteration.
         """
-        return self._call("objectiveHistory")
+        return self.call("objectiveHistory")
 
     @property
     @since("2.0.0")
@@ -318,7 +318,7 @@ def totalIterations(self):
         """
         Number of training iterations until termination.
         """
-        return self._call("totalIterations")
+        return self.call("totalIterations")
 
 
 class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
@@ -343,7 +343,7 @@ def roc(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("roc")
+        return self.call("roc")
 
     @property
     @since("2.0.0")
@@ -356,7 +356,7 @@ def areaUnderROC(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("areaUnderROC")
+        return self.call("areaUnderROC")
 
     @property
     @since("2.0.0")
@@ -369,7 +369,7 @@ def pr(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("pr")
+        return self.call("pr")
 
     @property
     @since("2.0.0")
@@ -382,7 +382,7 @@ def fMeasureByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("fMeasureByThreshold")
+        return self.call("fMeasureByThreshold")
 
     @property
     @since("2.0.0")
@@ -396,7 +396,7 @@ def precisionByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("precisionByThreshold")
+        return self.call("precisionByThreshold")
 
     @property
     @since("2.0.0")
@@ -410,7 +410,7 @@ def recallByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("recallByThreshold")
+        return self.call("recallByThreshold")
 
 
 class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 7dd6eb2a9bc8c..13b0fef5d23b0 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -160,7 +160,7 @@ def summary(self):
         `trainingSummary == None`.
         """
         java_lrt_summary = self._call_java("summary")
-        return LinearRegressionTrainingSummary._fromActiveSparkContext(java_lrt_summary)
+        return LinearRegressionTrainingSummary.fromActiveSparkContext(java_lrt_summary)
 
     @property
     @since("2.0.0")
@@ -198,7 +198,7 @@ def predictions(self):
         """
         Dataframe outputted by the model's `transform` method.
         """
-        return self._call("predictions")
+        return self.call("predictions")
 
     @property
     @since("2.0.0")
@@ -207,7 +207,7 @@ def predictionCol(self):
         Field in "predictions" which gives the predicted value of
         the label at each instance.
         """
-        return self._call("predictionCol")
+        return self.call("predictionCol")
 
     @property
     @since("2.0.0")
@@ -216,7 +216,7 @@ def labelCol(self):
         Field in "predictions" which gives the true label of each
         instance.
         """
-        return self._call("labelCol")
+        return self.call("labelCol")
 
     @property
     @since("2.0.0")
@@ -230,7 +230,7 @@ def explainedVariance(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("explainedVariance")
+        return self.call("explainedVariance")
 
     @property
     @since("2.0.0")
@@ -244,7 +244,7 @@ def meanAbsoluteError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("meanAbsoluteError")
+        return self.call("meanAbsoluteError")
 
     @property
     @since("2.0.0")
@@ -258,7 +258,7 @@ def meanSquaredError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("meanSquaredError")
+        return self.call("meanSquaredError")
 
     @property
     @since("2.0.0")
@@ -271,7 +271,7 @@ def rootMeanSquaredError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("rootMeanSquaredError")
+        return self.call("rootMeanSquaredError")
 
     @property
     @since("2.0.0")
@@ -284,7 +284,7 @@ def r2(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self._call("r2")
+        return self.call("r2")
 
     @property
     @since("2.0.0")
@@ -292,7 +292,7 @@ def residuals(self):
         """
         Residuals (label - predicted value)
         """
-        return self._call("residuals")
+        return self.call("residuals")
 
     @property
     @since("2.0.0")
@@ -300,7 +300,7 @@ def numInstances(self):
         """
         Number of instances in DataFrame predictions
         """
-        return self._call("numInstances")
+        return self.call("numInstances")
 
     @property
     @since("2.0.0")
@@ -309,7 +309,7 @@ def devianceResiduals(self):
         The weighted residuals, the usual residuals rescaled by the
         square root of the instance weights.
         """
-        return self._call("devianceResiduals")
+        return self.call("devianceResiduals")
 
     @property
     @since("2.0.0")
@@ -317,7 +317,7 @@ def coefficientStandardErrors(self):
         """
         Standard error of estimated coefficients and intercept.
         """
-        return self._call("coefficientStandardErrors")
+        return self.call("coefficientStandardErrors")
 
     @property
     @since("2.0.0")
@@ -325,7 +325,7 @@ def tValues(self):
         """
         T-statistic of estimated coefficients and intercept.
         """
-        return self._call("tValues")
+        return self.call("tValues")
 
     @property
     @since("2.0.0")
@@ -333,7 +333,7 @@ def pValues(self):
         """
         Two-sided p-value of estimated coefficients and intercept.
         """
-        return self._call("pValues")
+        return self.call("pValues")
 
 
 class LinearRegressionTrainingSummary(LinearRegressionSummary):
@@ -352,7 +352,7 @@ def totalIterations(self):
         """
         Number of training iterations until termination.
         """
-        return self._call("totalIterations")
+        return self.call("totalIterations")
 
 
 @inherit_doc
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 888d475c3e421..86997b2888b91 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -142,12 +142,12 @@ def __del__(self):
         self._sc._gateway.detach(self._java_obj)
 
     @classmethod
-    def _fromActiveSparkContext(cls, java_obj):
+    def fromActiveSparkContext(cls, java_obj):
         """Create from a currently active context"""
         sc = SparkContext._active_spark_context
         return cls(sc, java_obj)
 
-    def _call(self, name, *a):
+    def call(self, name, *a):
         """Call method of java_obj"""
         return callJavaFunc(self._sc, getattr(self._java_obj, name), *a)
 
@@ -160,6 +160,10 @@ def __init__(self, java_model):
         sc = SparkContext.getOrCreate()
         super(JavaModelWrapper, self).__init__(sc, java_model)
 
+    @property
+    def _java_model(self):
+        return self._java_obj
+
 
 def inherit_doc(cls):
     """

From 8d0f01a269adc8dca3383ecb9e4bf4f780806984 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 10 Mar 2016 16:42:00 -0800
Subject: [PATCH 07/17] moved JavaCallable to ML and changed to _call_java, it
 duplicates some code, but is cleaner and more consistent

---
 python/pyspark/ml/classification.py | 32 ++++++++++++------------
 python/pyspark/ml/regression.py     | 38 ++++++++++++++---------------
 python/pyspark/ml/wrapper.py        | 27 +++++++++++++++++---
 python/pyspark/mllib/common.py      | 35 ++++++--------------------
 4 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f407313d87323..479c41e93e2ee 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -19,11 +19,11 @@
 
 from pyspark import since
 from pyspark.ml.util import keyword_only
-from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaCallable
 from pyspark.ml.param.shared import *
 from pyspark.ml.regression import (
     RandomForestParams, TreeEnsembleParams, DecisionTreeModel, TreeEnsembleModels)
-from pyspark.mllib.common import inherit_doc, JavaCallable
+from pyspark.mllib.common import inherit_doc
 
 
 __all__ = ['LogisticRegression', 'LogisticRegressionModel',
@@ -229,7 +229,7 @@ def summary(self):
         `trainingSummary == None`.
         """
         java_blrt_summary = self._call_java("summary")
-        return BinaryLogisticRegressionTrainingSummary.fromActiveSparkContext(java_blrt_summary)
+        return BinaryLogisticRegressionTrainingSummary(java_blrt_summary)
 
     @property
     @since("2.0.0")
@@ -248,7 +248,7 @@ def evaluate(self, df):
         @param dataset Test dataset to evaluate model on.
         ""
         java_blr_summary = self._call_java("evaluate", df)
-        return BinaryLogisticRegressionSummary._fromActiveSparkContext(java_blr_summary)
+        return BinaryLogisticRegressionSummary(java_blr_summary)
     """
 
 
@@ -265,7 +265,7 @@ def predictions(self):
         """
         Dataframe outputted by the model's `transform` method.
         """
-        return self.call("predictions")
+        return self._call_java("predictions")
 
     @property
     @since("2.0.0")
@@ -274,7 +274,7 @@ def probabilityCol(self):
         Field in "predictions" which gives the calibrated probability
         of each instance as a vector.
         """
-        return self.call("probabilityCol")
+        return self._call_java("probabilityCol")
 
     @property
     @since("2.0.0")
@@ -283,7 +283,7 @@ def labelCol(self):
         Field in "predictions" which gives the true label of each
         instance.
         """
-        return self.call("labelCol")
+        return self._call_java("labelCol")
 
     @property
     @since("2.0.0")
@@ -292,7 +292,7 @@ def featuresCol(self):
         Field in "predictions" which gives the features of each instance
         as a vector.
         """
-        return self.call("featuresCol")
+        return self._call_java("featuresCol")
 
 
 class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
@@ -310,7 +310,7 @@ def objectiveHistory(self):
         """
         Objective function (scaled loss + regularization) at each iteration.
         """
-        return self.call("objectiveHistory")
+        return self._call_java("objectiveHistory")
 
     @property
     @since("2.0.0")
@@ -318,7 +318,7 @@ def totalIterations(self):
         """
         Number of training iterations until termination.
         """
-        return self.call("totalIterations")
+        return self._call_java("totalIterations")
 
 
 class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
@@ -343,7 +343,7 @@ def roc(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("roc")
+        return self._call_java("roc")
 
     @property
     @since("2.0.0")
@@ -356,7 +356,7 @@ def areaUnderROC(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("areaUnderROC")
+        return self._call_java("areaUnderROC")
 
     @property
     @since("2.0.0")
@@ -369,7 +369,7 @@ def pr(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("pr")
+        return self._call_java("pr")
 
     @property
     @since("2.0.0")
@@ -382,7 +382,7 @@ def fMeasureByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("fMeasureByThreshold")
+        return self._call_java("fMeasureByThreshold")
 
     @property
     @since("2.0.0")
@@ -396,7 +396,7 @@ def precisionByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("precisionByThreshold")
+        return self._call_java("precisionByThreshold")
 
     @property
     @since("2.0.0")
@@ -410,7 +410,7 @@ def recallByThreshold(self):
         `LogisticRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("recallByThreshold")
+        return self._call_java("recallByThreshold")
 
 
 class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 13b0fef5d23b0..8d064d8fec17c 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -20,8 +20,8 @@
 from pyspark import since
 from pyspark.ml.param.shared import *
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel
-from pyspark.mllib.common import inherit_doc, JavaCallable
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaCallable
+from pyspark.mllib.common import inherit_doc
 
 
 __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
@@ -160,7 +160,7 @@ def summary(self):
         `trainingSummary == None`.
         """
         java_lrt_summary = self._call_java("summary")
-        return LinearRegressionTrainingSummary.fromActiveSparkContext(java_lrt_summary)
+        return LinearRegressionTrainingSummary(java_lrt_summary)
 
     @property
     @since("2.0.0")
@@ -179,7 +179,7 @@ def evaluate(self, df):
         @param dataset Test dataset to evaluate model on.
         ""
         java_lr_summary = self._call_java("evaluate", df)
-        return LinearRegressionSummary._fromActiveSparkContext(java_lr_summary)
+        return LinearRegressionSummary(java_lr_summary)
     """
 
 
@@ -198,7 +198,7 @@ def predictions(self):
         """
         Dataframe outputted by the model's `transform` method.
         """
-        return self.call("predictions")
+        return self._call_java("predictions")
 
     @property
     @since("2.0.0")
@@ -207,7 +207,7 @@ def predictionCol(self):
         Field in "predictions" which gives the predicted value of
         the label at each instance.
         """
-        return self.call("predictionCol")
+        return self._call_java("predictionCol")
 
     @property
     @since("2.0.0")
@@ -216,7 +216,7 @@ def labelCol(self):
         Field in "predictions" which gives the true label of each
         instance.
         """
-        return self.call("labelCol")
+        return self._call_java("labelCol")
 
     @property
     @since("2.0.0")
@@ -230,7 +230,7 @@ def explainedVariance(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("explainedVariance")
+        return self._call_java("explainedVariance")
 
     @property
     @since("2.0.0")
@@ -244,7 +244,7 @@ def meanAbsoluteError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("meanAbsoluteError")
+        return self._call_java("meanAbsoluteError")
 
     @property
     @since("2.0.0")
@@ -258,7 +258,7 @@ def meanSquaredError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("meanSquaredError")
+        return self._call_java("meanSquaredError")
 
     @property
     @since("2.0.0")
@@ -271,7 +271,7 @@ def rootMeanSquaredError(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("rootMeanSquaredError")
+        return self._call_java("rootMeanSquaredError")
 
     @property
     @since("2.0.0")
@@ -284,7 +284,7 @@ def r2(self):
         `LinearRegression.weightCol`. This will change in later Spark
         versions.
         """
-        return self.call("r2")
+        return self._call_java("r2")
 
     @property
     @since("2.0.0")
@@ -292,7 +292,7 @@ def residuals(self):
         """
         Residuals (label - predicted value)
         """
-        return self.call("residuals")
+        return self._call_java("residuals")
 
     @property
     @since("2.0.0")
@@ -300,7 +300,7 @@ def numInstances(self):
         """
         Number of instances in DataFrame predictions
         """
-        return self.call("numInstances")
+        return self._call_java("numInstances")
 
     @property
     @since("2.0.0")
@@ -309,7 +309,7 @@ def devianceResiduals(self):
         The weighted residuals, the usual residuals rescaled by the
         square root of the instance weights.
         """
-        return self.call("devianceResiduals")
+        return self._call_java("devianceResiduals")
 
     @property
     @since("2.0.0")
@@ -317,7 +317,7 @@ def coefficientStandardErrors(self):
         """
         Standard error of estimated coefficients and intercept.
         """
-        return self.call("coefficientStandardErrors")
+        return self._call_java("coefficientStandardErrors")
 
     @property
     @since("2.0.0")
@@ -325,7 +325,7 @@ def tValues(self):
         """
         T-statistic of estimated coefficients and intercept.
         """
-        return self.call("tValues")
+        return self._call_java("tValues")
 
     @property
     @since("2.0.0")
@@ -333,7 +333,7 @@ def pValues(self):
         """
         Two-sided p-value of estimated coefficients and intercept.
         """
-        return self.call("pValues")
+        return self._call_java("pValues")
 
 
 class LinearRegressionTrainingSummary(LinearRegressionSummary):
@@ -352,7 +352,7 @@ def totalIterations(self):
         """
         Number of training iterations until termination.
         """
-        return self.call("totalIterations")
+        return self._call_java("totalIterations")
 
 
 @inherit_doc
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index f8feaa1dfa2be..cd1d064009a87 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -34,10 +34,12 @@ class JavaWrapper(Params):
 
     __metaclass__ = ABCMeta
 
-    #: The wrapped Java companion object. Subclasses should initialize
-    #: it properly. The param values in the Java object should be
-    #: synced with the Python wrapper in fit/transform/evaluate/copy.
-    _java_obj = None
+    def __init__(self):
+        super(JavaWrapper, self).__init__()
+        #: The wrapped Java companion object. Subclasses should initialize
+        #: it properly. The param values in the Java object should be
+        #: synced with the Python wrapper in fit/transform/evaluate/copy.
+        self._java_obj = None
 
     @staticmethod
     def _new_java_obj(java_class, *args):
@@ -191,3 +193,20 @@ def _call_java(self, name, *args):
         sc = SparkContext._active_spark_context
         java_args = [_py2java(sc, arg) for arg in args]
         return _java2py(sc, m(*java_args))
+
+
+class JavaCallable(object):
+    """
+    Wrapper for a plain object in JVM to make Java calls
+    """
+    def __init__(self, java_obj, sc=None):
+        self._sc = sc if sc is not None else SparkContext._active_spark_context
+        self._java_obj = java_obj
+
+    def __del__(self):
+        self._sc._gateway.detach(self._java_obj)
+
+    def _call_java(self, name, *args):
+        m = getattr(self._java_obj, name)
+        java_args = [_py2java(self._sc, arg) for arg in args]
+        return _java2py(self._sc, m(*java_args))
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 86997b2888b91..9fda1b1682f57 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -130,39 +130,20 @@ def callMLlibFunc(name, *args):
     return callJavaFunc(sc, api, *args)
 
 
-class JavaCallable(object):
+class JavaModelWrapper(object):
     """
-    Wrapper for an object in JVM to make Java calls
+    Wrapper for the model in JVM
     """
-    def __init__(self, sc, java_obj):
-        self._sc = sc
-        self._java_obj = java_obj
+    def __init__(self, java_model):
+        self._sc = SparkContext.getOrCreate()
+        self._java_model = java_model
 
     def __del__(self):
-        self._sc._gateway.detach(self._java_obj)
-
-    @classmethod
-    def fromActiveSparkContext(cls, java_obj):
-        """Create from a currently active context"""
-        sc = SparkContext._active_spark_context
-        return cls(sc, java_obj)
+        self._sc._gateway.detach(self._java_model)
 
     def call(self, name, *a):
-        """Call method of java_obj"""
-        return callJavaFunc(self._sc, getattr(self._java_obj, name), *a)
-
-
-class JavaModelWrapper(JavaCallable):
-    """
-    Wrapper for the model in JVM
-    """
-    def __init__(self, java_model):
-        sc = SparkContext.getOrCreate()
-        super(JavaModelWrapper, self).__init__(sc, java_model)
-
-    @property
-    def _java_model(self):
-        return self._java_obj
+        """Call method of java_model"""
+        return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
 
 
 def inherit_doc(cls):

From 460881cffcb9b6bce35b822e4a9999325352074d Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 16 Mar 2016 11:45:16 -0700
Subject: [PATCH 08/17] reverted change to JavaWrapper static _java_obj, to be
 done in another PR

---
 python/pyspark/ml/wrapper.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index cd1d064009a87..78225c043e9ff 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -34,12 +34,10 @@ class JavaWrapper(Params):
 
     __metaclass__ = ABCMeta
 
-    def __init__(self):
-        super(JavaWrapper, self).__init__()
-        #: The wrapped Java companion object. Subclasses should initialize
-        #: it properly. The param values in the Java object should be
-        #: synced with the Python wrapper in fit/transform/evaluate/copy.
-        self._java_obj = None
+    #: The wrapped Java companion object. Subclasses should initialize
+    #: it properly. The param values in the Java object should be
+    #: synced with the Python wrapper in fit/transform/evaluate/copy.
+    _java_obj = None
 
     @staticmethod
     def _new_java_obj(java_class, *args):

From 49a1f79f6a0756ad135e7c3a83cfc87d592869ed Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 24 Mar 2016 11:25:06 -0700
Subject: [PATCH 09/17] Made JavaCallable class able to be a mixin for
 JavaModel to reuse _call_java

---
 python/pyspark/ml/wrapper.py | 47 ++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index da487b6a438c6..2410d56c795b8 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -171,8 +171,30 @@ def _transform(self, dataset):
         return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
 
+class JavaCallable(object):
+    """
+    Wrapper for a plain object in JVM to make Java calls, can be used
+    as a mixin to another class that defines a _java_obj wrapper
+    """
+    def __init__(self, java_obj=None, sc=None):
+        super(JavaCallable, self).__init__()
+        self._sc = sc if sc is not None else SparkContext._active_spark_context
+        # if this class is a mixin and _java_obj is already defined then don't initialize
+        if java_obj is not None or not hasattr(self, "_java_obj"):
+            self._java_obj = java_obj
+
+    def __del__(self):
+        if self._java_obj is not None:
+            self._sc._gateway.detach(self._java_obj)
+
+    def _call_java(self, name, *args):
+        m = getattr(self._java_obj, name)
+        java_args = [_py2java(self._sc, arg) for arg in args]
+        return _java2py(self._sc, m(*java_args))
+
+
 @inherit_doc
-class JavaModel(Model, JavaTransformer):
+class JavaModel(Model, JavaCallable, JavaTransformer):
     """
     Base class for :py:class:`Model`s that wrap Java/Scala
     implementations. Subclasses should inherit this class before
@@ -217,26 +239,3 @@ def copy(self, extra=None):
             that._java_obj = self._java_obj.copy(self._empty_java_param_map())
             that._transfer_params_to_java()
         return that
-
-    def _call_java(self, name, *args):
-        m = getattr(self._java_obj, name)
-        sc = SparkContext._active_spark_context
-        java_args = [_py2java(sc, arg) for arg in args]
-        return _java2py(sc, m(*java_args))
-
-
-class JavaCallable(object):
-    """
-    Wrapper for a plain object in JVM to make Java calls
-    """
-    def __init__(self, java_obj, sc=None):
-        self._sc = sc if sc is not None else SparkContext._active_spark_context
-        self._java_obj = java_obj
-
-    def __del__(self):
-        self._sc._gateway.detach(self._java_obj)
-
-    def _call_java(self, name, *args):
-        m = getattr(self._java_obj, name)
-        java_args = [_py2java(self._sc, arg) for arg in args]
-        return _java2py(self._sc, m(*java_args))

From 3571838b60fb1f0cf869e89e127b6ad7b95bd3b3 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 24 Mar 2016 15:38:00 -0700
Subject: [PATCH 10/17] Enabled evaluate() for Linear and Logistic regression,
 now that it will be public

---
 python/pyspark/ml/classification.py | 18 +++++++++---------
 python/pyspark/ml/regression.py     | 18 +++++++++---------
 python/pyspark/ml/tests.py          |  8 ++++++++
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index bf01dee9e618b..063622d0269c9 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -254,16 +254,16 @@ def hasSummary(self):
         """
         return self._call_java("hasSummary")
 
-    """
-    TODO: enable once Scala API is made public
-    def evaluate(self, df):
-        ""
-        Evaluates the model on a testset.
-        @param dataset Test dataset to evaluate model on.
-        ""
-        java_blr_summary = self._call_java("evaluate", df)
+    @since("2.0.0")
+    def evaluate(self, dataset):
+        """
+        Evaluates the model on a test dataset.
+
+        :param dataset:
+          Test dataset to evaluate model on.
+        """
+        java_blr_summary = self._call_java("evaluate", dataset)
         return BinaryLogisticRegressionSummary(java_blr_summary)
-    """
 
 
 class LogisticRegressionSummary(JavaCallable):
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 74b7b287b4e8f..e45bffc5c09d1 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -171,16 +171,16 @@ def hasSummary(self):
         """
         return self._call_java("hasSummary")
 
-    """
-    TODO: enable once Scala API is made public
-    def evaluate(self, df):
-        ""
-        Evaluates the model on a testset.
-        @param dataset Test dataset to evaluate model on.
-        ""
-        java_lr_summary = self._call_java("evaluate", df)
+    @since("2.0.0")
+    def evaluate(self, dataset):
+        """
+        Evaluates the model on a test dataset.
+
+        :param dataset:
+          Test dataset to evaluate model on.
+        """
+        java_lr_summary = self._call_java("evaluate", dataset)
         return LinearRegressionSummary(java_lr_summary)
-    """
 
 
 class LinearRegressionSummary(JavaCallable):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 350ea26a595ee..21bcd10482add 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -700,6 +700,10 @@ def test_linear_regression_summary(self):
         self.assertTrue(isinstance(tValues, list) and isinstance(tValues[0], float))
         pValues = s.pValues
         self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
 
     def test_logistic_regression_summary(self):
         from pyspark.mllib.linalg import Vectors
@@ -725,6 +729,10 @@ def test_logistic_regression_summary(self):
         self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
         self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
         self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
 
 
 if __name__ == "__main__":

From 4ba3f731c58918c5e2eac13338dc834e411b933f Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 4 Apr 2016 15:18:41 -0700
Subject: [PATCH 11/17] added featuresCol and objectiveHistory to
 LinearRegressionTrainingSummary, added check in evaluation(dataset) to make
 sure input is a DataFrame, fixed issues in docstrings

---
 .../classification/LogisticRegression.scala   |  8 +++---
 .../ml/regression/LinearRegression.scala      |  6 ++++-
 python/pyspark/ml/classification.py           | 23 ++++++++++++-----
 python/pyspark/ml/regression.py               | 25 ++++++++++++++++---
 python/pyspark/ml/tests.py                    |  3 +++
 5 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index aeb94a6600e51..ee836a6bb30af 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -777,10 +777,10 @@ sealed trait LogisticRegressionSummary extends Serializable {
   /** Dataframe outputted by the model's `transform` method. */
   def predictions: DataFrame
 
-  /** Field in "predictions" which gives the calibrated probability of each instance as a vector. */
+  /** Field in "predictions" which gives the calibrated probability of each class as a vector. */
   def probabilityCol: String
 
-  /** Field in "predictions" which gives the true label of each instance. */
+  /** Field in "predictions" which gives the true label of each instance (if available). */
   def labelCol: String
 
   /** Field in "predictions" which gives the features of each instance as a vector. */
@@ -794,7 +794,7 @@ sealed trait LogisticRegressionSummary extends Serializable {
  *
  * @param predictions dataframe outputted by the model's `transform` method.
  * @param probabilityCol field in "predictions" which gives the calibrated probability of
- *                       each instance as a vector.
+ *                       each class as a vector.
  * @param labelCol field in "predictions" which gives the true label of each instance.
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
@@ -818,7 +818,7 @@ class BinaryLogisticRegressionTrainingSummary private[classification] (
  *
  * @param predictions dataframe outputted by the model's `transform` method.
  * @param probabilityCol field in "predictions" which gives the calibrated probability of
- *                       each instance.
+ *                       each class.
  * @param labelCol field in "predictions" which gives the true label of each instance.
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 2633c06f40561..952f20bc62bac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -511,7 +511,7 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
 /**
  * :: Experimental ::
  * Linear regression training results. Currently, the training summary ignores the
- * training coefficients except for the objective trace.
+ * training weights except for the objective trace.
  *
  * @param predictions predictions outputted by the model's `transform` method.
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
@@ -539,6 +539,10 @@ class LinearRegressionTrainingSummary private[regression] (
  * Linear regression results evaluated on a dataset.
  *
  * @param predictions predictions outputted by the model's `transform` method.
+ * @param predictionCol Field in "predictions" which gives the predicted value of the label at
+ *                      each instance.
+ * @param labelCol Field in "predictions" which gives the true label of each instance
+ *                 (if available).
  */
 @Since("1.5.0")
 @Experimental
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index cc718a98a3226..314865c36d339 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -25,9 +25,11 @@
 from pyspark.ml.regression import (
     RandomForestParams, TreeEnsembleParams, DecisionTreeModel, TreeEnsembleModels)
 from pyspark.mllib.common import inherit_doc
+from pyspark.sql import DataFrame
 
 
 __all__ = ['LogisticRegression', 'LogisticRegressionModel',
+           'LogisticRegressionSummary', 'LogisticRegressionTrainingSummary'
            'BinaryLogisticRegressionSummary', 'BinaryLogisticRegressionTrainingSummary',
            'DecisionTreeClassifier', 'DecisionTreeClassificationModel',
            'GBTClassifier', 'GBTClassificationModel',
@@ -240,9 +242,10 @@ def summary(self):
         """
         Gets summary (e.g. residuals, mse, r-squared ) of model on
         training set. An exception is thrown if
-        `trainingSummary == None`.
+        `trainingSummary is None`.
         """
         java_blrt_summary = self._call_java("summary")
+        # Note: Once multiclass is added, update this to return correct summary
         return BinaryLogisticRegressionTrainingSummary(java_blrt_summary)
 
     @property
@@ -260,8 +263,11 @@ def evaluate(self, dataset):
         Evaluates the model on a test dataset.
 
         :param dataset:
-          Test dataset to evaluate model on.
+          Test dataset to evaluate model on, where dataset is an
+          instance of :py:class:`pyspark.sql.DataFrame`
         """
+        if not isinstance(dataset, DataFrame):
+            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
         java_blr_summary = self._call_java("evaluate", dataset)
         return BinaryLogisticRegressionSummary(java_blr_summary)
 
@@ -286,7 +292,7 @@ def predictions(self):
     def probabilityCol(self):
         """
         Field in "predictions" which gives the calibrated probability
-        of each instance as a vector.
+        of each class as a vector.
         """
         return self._call_java("probabilityCol")
 
@@ -295,7 +301,7 @@ def probabilityCol(self):
     def labelCol(self):
         """
         Field in "predictions" which gives the true label of each
-        instance.
+        instance (if available).
         """
         return self._call_java("labelCol")
 
@@ -309,6 +315,7 @@ def featuresCol(self):
         return self._call_java("featuresCol")
 
 
+@inherit_doc
 class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
     """
     Abstraction for multinomial Logistic Regression Training results.
@@ -322,7 +329,8 @@ class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
     @since("2.0.0")
     def objectiveHistory(self):
         """
-        Objective function (scaled loss + regularization) at each iteration.
+        Objective function (scaled loss + regularization) at each
+        iteration.
         """
         return self._call_java("objectiveHistory")
 
@@ -335,6 +343,7 @@ def totalIterations(self):
         return self._call_java("totalIterations")
 
 
+@inherit_doc
 class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
     """
     .. note:: Experimental
@@ -377,7 +386,8 @@ def areaUnderROC(self):
     def pr(self):
         """
         Returns the precision-recall curve, which is an Dataframe
-        containing two fields recall, precision with (0.0, 1.0) prepended to it.
+        containing two fields recall, precision with (0.0, 1.0) prepended
+        to it.
 
         Note: This ignores instance weights (setting all to 1.0) from
         `LogisticRegression.weightCol`. This will change in later Spark
@@ -427,6 +437,7 @@ def recallByThreshold(self):
         return self._call_java("recallByThreshold")
 
 
+@inherit_doc
 class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
                                               LogisticRegressionTrainingSummary):
     """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 378865f3d94b2..9951a1c7c7eef 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -157,7 +157,7 @@ def summary(self):
         """
         Gets summary (e.g. residuals, mse, r-squared ) of model on
         training set. An exception is thrown if
-        `trainingSummary == None`.
+        `trainingSummary is None`.
         """
         java_lrt_summary = self._call_java("summary")
         return LinearRegressionTrainingSummary(java_lrt_summary)
@@ -214,7 +214,7 @@ def predictionCol(self):
     def labelCol(self):
         """
         Field in "predictions" which gives the true label of each
-        instance.
+        instance (if available).
         """
         return self._call_java("labelCol")
 
@@ -336,16 +336,35 @@ def pValues(self):
         return self._call_java("pValues")
 
 
+@inherit_doc
 class LinearRegressionTrainingSummary(LinearRegressionSummary):
     """
     .. note:: Experimental
 
     Linear regression training results. Currently, the training summary ignores the
-    training coefficients except for the objective trace.
+    training weights except for the objective trace.
 
     .. versionadded:: 2.0.0
     """
 
+    @property
+    @since("2.0.0")
+    def featuresCol(self):
+        """
+        Field in "predictions" which gives the features of each instance
+        as a vector.
+        """
+        return self._call_java("featuresCol")
+
+    @property
+    @since("2.0.0")
+    def objectiveHistory(self):
+        """
+        Objective function (scaled loss + regularization) at each
+        iteration.
+        """
+        return self._call_java("objectiveHistory")
+
     @property
     @since("2.0.0")
     def totalIterations(self):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 5216f2009ae5f..a2c4cd8b6cc3d 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -726,6 +726,9 @@ def test_linear_regression_summary(self):
         self.assertTrue(isinstance(s.predictions, DataFrame))
         self.assertEqual(s.predictionCol, "prediction")
         self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
         self.assertAlmostEqual(s.explainedVariance, 0.25, 2)
         self.assertAlmostEqual(s.meanAbsoluteError, 0.0)
         self.assertAlmostEqual(s.meanSquaredError, 0.0)

From d23f546d5c0742c79840f2bbbd80eb22adcf32c0 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 10:07:49 -0700
Subject: [PATCH 12/17] moved featuresCol from LinearRegressionTrainingSummary
 to LinearRegressionSummary

---
 .../ml/regression/LinearRegression.scala      | 19 +++++++++++++------
 python/pyspark/ml/regression.py               | 18 +++++++++---------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 952f20bc62bac..2fc05b480465c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -190,9 +190,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
         summaryModel.transform(dataset),
         predictionColName,
         $(labelCol),
+        $(featuresCol),
         summaryModel,
         model.diagInvAtWA.toArray,
-        $(featuresCol),
         Array(0D))
 
       return lrModel.setSummary(trainingSummary)
@@ -249,9 +249,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
           summaryModel.transform(dataset),
           predictionColName,
           $(labelCol),
+          $(featuresCol),
           model,
           Array(0D),
-          $(featuresCol),
           Array(0D))
         return copyValues(model.setSummary(trainingSummary))
       } else {
@@ -356,9 +356,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       summaryModel.transform(dataset),
       predictionColName,
       $(labelCol),
+      $(featuresCol),
       model,
       Array(0D),
-      $(featuresCol),
       objectiveHistory)
     model.setSummary(trainingSummary)
   }
@@ -421,7 +421,7 @@ class LinearRegressionModel private[ml] (
     // Handle possible missing or invalid prediction columns
     val (summaryModel, predictionColName) = findSummaryModelAndPredictionCol()
     new LinearRegressionSummary(summaryModel.transform(dataset), predictionColName,
-      $(labelCol), summaryModel, Array(0D))
+      $(labelCol), $(featuresCol), summaryModel, Array(0D))
   }
 
   /**
@@ -522,11 +522,17 @@ class LinearRegressionTrainingSummary private[regression] (
     predictions: DataFrame,
     predictionCol: String,
     labelCol: String,
+    featuresCol: String,
     model: LinearRegressionModel,
     diagInvAtWA: Array[Double],
-    val featuresCol: String,
     val objectiveHistory: Array[Double])
-  extends LinearRegressionSummary(predictions, predictionCol, labelCol, model, diagInvAtWA) {
+  extends LinearRegressionSummary(
+    predictions,
+    predictionCol,
+    labelCol,
+    featuresCol,
+    model,
+    diagInvAtWA) {
 
   /** Number of training iterations until termination */
   @Since("1.5.0")
@@ -550,6 +556,7 @@ class LinearRegressionSummary private[regression] (
     @transient val predictions: DataFrame,
     val predictionCol: String,
     val labelCol: String,
+    val featuresCol: String,
     val model: LinearRegressionModel,
     private val diagInvAtWA: Array[Double]) extends Serializable {
 
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 9951a1c7c7eef..459944d0505d6 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -218,6 +218,15 @@ def labelCol(self):
         """
         return self._call_java("labelCol")
 
+    @property
+    @since("2.0.0")
+    def featuresCol(self):
+        """
+        Field in "predictions" which gives the features of each instance
+        as a vector.
+        """
+        return self._call_java("featuresCol")
+
     @property
     @since("2.0.0")
     def explainedVariance(self):
@@ -347,15 +356,6 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary):
     .. versionadded:: 2.0.0
     """
 
-    @property
-    @since("2.0.0")
-    def featuresCol(self):
-        """
-        Field in "predictions" which gives the features of each instance
-        as a vector.
-        """
-        return self._call_java("featuresCol")
-
     @property
     @since("2.0.0")
     def objectiveHistory(self):

From b44c2338ec30028c4c701628527d5ee5e5abacaa Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 13:33:17 -0700
Subject: [PATCH 13/17] was missing comma in classification __all_  list

---
 python/pyspark/ml/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 314865c36d339..4d3a10ffba3db 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -29,7 +29,7 @@
 
 
 __all__ = ['LogisticRegression', 'LogisticRegressionModel',
-           'LogisticRegressionSummary', 'LogisticRegressionTrainingSummary'
+           'LogisticRegressionSummary', 'LogisticRegressionTrainingSummary',
            'BinaryLogisticRegressionSummary', 'BinaryLogisticRegressionTrainingSummary',
            'DecisionTreeClassifier', 'DecisionTreeClassificationModel',
            'GBTClassifier', 'GBTClassificationModel',

From e0ea89ed194298241156fc7edeec56a60bda3a61 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 13:53:16 -0700
Subject: [PATCH 14/17] added seealso for solver dependent summary metrics

---
 python/pyspark/ml/regression.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 459944d0505d6..a2099b41ba29f 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -325,6 +325,9 @@ def devianceResiduals(self):
     def coefficientStandardErrors(self):
         """
         Standard error of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
         """
         return self._call_java("coefficientStandardErrors")
 
@@ -333,6 +336,9 @@ def coefficientStandardErrors(self):
     def tValues(self):
         """
         T-statistic of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
         """
         return self._call_java("tValues")
 
@@ -341,6 +347,9 @@ def tValues(self):
     def pValues(self):
         """
         Two-sided p-value of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
         """
         return self._call_java("pValues")
 
@@ -362,6 +371,9 @@ def objectiveHistory(self):
         """
         Objective function (scaled loss + regularization) at each
         iteration.
+        This value is only available when using the "l-bfgs" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
         """
         return self._call_java("objectiveHistory")
 
@@ -370,6 +382,9 @@ def objectiveHistory(self):
     def totalIterations(self):
         """
         Number of training iterations until termination.
+        This value is only available when using the "l-bfgs" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
         """
         return self._call_java("totalIterations")
 

From 7f2bed6aea450fae050e2c72967ec43d63f40ca0 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 14:26:02 -0700
Subject: [PATCH 15/17] cleanup up docs

---
 .../classification/LogisticRegression.scala   |  2 +-
 .../ml/regression/LinearRegression.scala      | 20 ++++++++++++++++---
 python/pyspark/ml/classification.py           |  2 +-
 python/pyspark/ml/regression.py               |  2 +-
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index ee836a6bb30af..37182928cccc8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -818,7 +818,7 @@ class BinaryLogisticRegressionTrainingSummary private[classification] (
  *
  * @param predictions dataframe outputted by the model's `transform` method.
  * @param probabilityCol field in "predictions" which gives the calibrated probability of
- *                       each class.
+ *                       each class as a vector.
  * @param labelCol field in "predictions" which gives the true label of each instance.
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 2fc05b480465c..9619e72a4594a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -534,7 +534,12 @@ class LinearRegressionTrainingSummary private[regression] (
     model,
     diagInvAtWA) {
 
-  /** Number of training iterations until termination */
+  /**
+   * Number of training iterations until termination
+   *
+   * This value is only available when using the "l-bfgs" solver.
+   * @see [[LinearRegression.solver]]
+   */
   @Since("1.5.0")
   val totalIterations = objectiveHistory.length
 
@@ -547,8 +552,8 @@ class LinearRegressionTrainingSummary private[regression] (
  * @param predictions predictions outputted by the model's `transform` method.
  * @param predictionCol Field in "predictions" which gives the predicted value of the label at
  *                      each instance.
- * @param labelCol Field in "predictions" which gives the true label of each instance
- *                 (if available).
+ * @param labelCol Field in "predictions" which gives the true label of each instance.
+ * @param featuresCol Field in "predictions" which gives the features of each instance as a vector.
  */
 @Since("1.5.0")
 @Experimental
@@ -650,6 +655,9 @@ class LinearRegressionSummary private[regression] (
 
   /**
    * Standard error of estimated coefficients and intercept.
+   *
+   * This value is only available when using the "normal" solver.
+   * @see [[LinearRegression.solver]]
    */
   lazy val coefficientStandardErrors: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
@@ -671,6 +679,9 @@ class LinearRegressionSummary private[regression] (
 
   /**
    * T-statistic of estimated coefficients and intercept.
+   *
+   * This value is only available when using the "normal" solver.
+   * @see [[LinearRegression.solver]]
    */
   lazy val tValues: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
@@ -688,6 +699,9 @@ class LinearRegressionSummary private[regression] (
 
   /**
    * Two-sided p-value of estimated coefficients and intercept.
+   *
+   * This value is only available when using the "normal" solver.
+   * @see [[LinearRegression.solver]]
    */
   lazy val pValues: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 4d3a10ffba3db..be7f9ea9efc11 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -301,7 +301,7 @@ def probabilityCol(self):
     def labelCol(self):
         """
         Field in "predictions" which gives the true label of each
-        instance (if available).
+        instance.
         """
         return self._call_java("labelCol")
 
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index a2099b41ba29f..071ba9b7644ca 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -214,7 +214,7 @@ def predictionCol(self):
     def labelCol(self):
         """
         Field in "predictions" which gives the true label of each
-        instance (if available).
+        instance.
         """
         return self._call_java("labelCol")
 

From 13a10ecfb24bed6a7708fa1a683855b1416accdd Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 14:32:29 -0700
Subject: [PATCH 16/17] needed to check evaluate() input for
 LinearRegressionModel is a DataFrame

---
 python/pyspark/ml/regression.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 071ba9b7644ca..6cd1b4bf3a149 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -22,6 +22,7 @@
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaCallable
 from pyspark.mllib.common import inherit_doc
+from pyspark.sql import DataFrame
 
 
 __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
@@ -177,8 +178,11 @@ def evaluate(self, dataset):
         Evaluates the model on a test dataset.
 
         :param dataset:
-          Test dataset to evaluate model on.
+          Test dataset to evaluate model on, where dataset is an
+          instance of :py:class:`pyspark.sql.DataFrame`
         """
+        if not isinstance(dataset, DataFrame):
+            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
         java_lr_summary = self._call_java("evaluate", dataset)
         return LinearRegressionSummary(java_lr_summary)
 

From 1f030e91369404535d107a58cfc786f7c9299ab9 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 5 Apr 2016 16:50:09 -0700
Subject: [PATCH 17/17] added MiMa exclude for change in
 LinearRegressionSummary constructor

---
 project/MimaExcludes.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 2be490b94264a..9762ef96f9498 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -601,6 +601,9 @@ object MimaExcludes {
         // [SPARK-13674][SQL] Add wholestage codegen support to Sample
         ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.util.random.PoissonSampler.this"),
         ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.util.random.PoissonSampler.this")
+      ) ++ Seq(
+        // [SPARK-13430][ML] moved featureCol from LinearRegressionModelSummary to LinearRegressionSummary
+        ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.this")
       )
     case v if v.startsWith("1.6") =>
       Seq(