From 21d4fe3de08b0612f84750b4c34064340ac640b2 Mon Sep 17 00:00:00 2001 From: lewuathe Date: Fri, 20 Mar 2015 15:27:40 +0900 Subject: [PATCH 1/2] Fix init logic of weights --- python/pyspark/mllib/regression.py | 3 ++- python/pyspark/mllib/tests.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 0c21ad578793f..c9e534728a347 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -135,7 +135,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first) - initial_weights = initial_weights or [0.0] * len(data.first().features) + if initial_weights == None: + initial_weights = [0.0] * len(data.first().features) weights, intercept = train_func(data, _convert_to_vector(initial_weights)) return modelClass(weights, intercept) diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 5328d99b69684..0fd1e7fc846d9 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -323,6 +323,12 @@ def test_regression(self): self.assertTrue(gbt_model.predict(features[2]) <= 0) self.assertTrue(gbt_model.predict(features[3]) > 0) + try: + LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + except ValueError: + self.fail() class StatTests(PySparkTestCase): # SPARK-4023 From 779520190dd2a106a2ffdc5dc1d9382de66fd064 Mon Sep 17 00:00:00 2001 From: lewuathe Date: Fri, 20 Mar 2015 15:44:16 +0900 Subject: [PATCH 2/2] Fix lint-python errors --- python/pyspark/mllib/regression.py | 2 +- python/pyspark/mllib/tests.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index c9e534728a347..ad2b0505e765b 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -135,7 +135,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first) - if initial_weights == None: + if initial_weights is None: initial_weights = [0.0] * len(data.first().features) weights, intercept = train_func(data, _convert_to_vector(initial_weights)) return modelClass(weights, intercept) diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 0fd1e7fc846d9..155019638f806 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -330,6 +330,7 @@ def test_regression(self): except ValueError: self.fail() + class StatTests(PySparkTestCase): # SPARK-4023 def test_col_with_different_rdds(self):