From 0a04a797034e6208456569b88b8f9a31a16aae91 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 2 Jun 2016 17:20:10 -0700
Subject: [PATCH 1/2] removed cases where setDefault seed=None

---
 python/pyspark/ml/classification.py | 4 ++--
 python/pyspark/ml/feature.py        | 2 +-
 python/pyspark/ml/recommendation.py | 2 +-
 python/pyspark/ml/regression.py     | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index ea660d7808fa4..99899aef6cbbe 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -681,7 +681,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -809,7 +809,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
             "org.apache.spark.ml.classification.GBTClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         lossType="logistic", maxIter=20, stepSize=0.1, seed=None)
+                         lossType="logistic", maxIter=20, stepSize=0.1)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1aff2e550f38c..e4105aacfbaf3 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2245,7 +2245,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
         super(Word2Vec, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
         self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                         seed=None, windowSize=5)
+                         windowSize=5)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 1778bfe938b1e..3e80c6ad99e71 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -142,7 +142,7 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
         super(ALS, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid)
         self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
+                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
                          ratingCol="rating", nonnegative=False, checkpointInterval=10,
                          intermediateStorageLevel="MEMORY_AND_DISK",
                          finalStorageLevel="MEMORY_AND_DISK")
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 1b7af7ef597c2..b194e39136f08 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -860,7 +860,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
             "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
+                         impurity="variance", subsamplingRate=1.0, numTrees=20,
                          featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -981,7 +981,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                          checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
-                         seed=None, impurity="variance")
+                         impurity="variance")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 

From 8fa36ba59879e4b43dcbbe5672b3cb5d5775e503 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 14 Jun 2016 08:37:01 -0700
Subject: [PATCH 2/2] explicitly set seed in ALS doc tests to match previous
 outputs

---
 python/pyspark/ml/recommendation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 3e80c6ad99e71..7a916b9ccf171 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -68,7 +68,7 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     >>> df = spark.createDataFrame(
     ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
     ...     ["user", "item", "rating"])
-    >>> als = ALS(rank=10, maxIter=5)
+    >>> als = ALS(rank=10, maxIter=5, seed=0)
     >>> model = als.fit(df)
     >>> model.rank
     10