From 21d4fe3de08b0612f84750b4c34064340ac640b2 Mon Sep 17 00:00:00 2001
From: lewuathe <lewuathe@me.com>
Date: Fri, 20 Mar 2015 15:27:40 +0900
Subject: [PATCH 1/2] Fix init logic of weights

---
 python/pyspark/mllib/regression.py | 3 ++-
 python/pyspark/mllib/tests.py      | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 0c21ad578793f..c9e534728a347 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -135,7 +135,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
     first = data.first()
     if not isinstance(first, LabeledPoint):
         raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
-    initial_weights = initial_weights or [0.0] * len(data.first().features)
+    if initial_weights == None:
+        initial_weights = [0.0] * len(data.first().features)
     weights, intercept = train_func(data, _convert_to_vector(initial_weights))
     return modelClass(weights, intercept)
 
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 5328d99b69684..0fd1e7fc846d9 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -323,6 +323,12 @@ def test_regression(self):
         self.assertTrue(gbt_model.predict(features[2]) <= 0)
         self.assertTrue(gbt_model.predict(features[3]) > 0)
 
+        try:
+            LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+            LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+            RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+        except ValueError:
+            self.fail()
 
 class StatTests(PySparkTestCase):
     # SPARK-4023

From 779520190dd2a106a2ffdc5dc1d9382de66fd064 Mon Sep 17 00:00:00 2001
From: lewuathe <lewuathe@me.com>
Date: Fri, 20 Mar 2015 15:44:16 +0900
Subject: [PATCH 2/2] Fix lint-python errors

---
 python/pyspark/mllib/regression.py | 2 +-
 python/pyspark/mllib/tests.py      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index c9e534728a347..ad2b0505e765b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -135,7 +135,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
     first = data.first()
     if not isinstance(first, LabeledPoint):
         raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
-    if initial_weights == None:
+    if initial_weights is None:
         initial_weights = [0.0] * len(data.first().features)
     weights, intercept = train_func(data, _convert_to_vector(initial_weights))
     return modelClass(weights, intercept)
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 0fd1e7fc846d9..155019638f806 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -330,6 +330,7 @@ def test_regression(self):
         except ValueError:
             self.fail()
 
+
 class StatTests(PySparkTestCase):
     # SPARK-4023
     def test_col_with_different_rdds(self):