From 183df3bfbb56921aa85095e62deefe09a37435b0 Mon Sep 17 00:00:00 2001
From: somideshmukh <somilde@us.ibm.com>
Date: Mon, 25 Jan 2016 17:15:32 +0530
Subject: [PATCH] [SPARK-12632][Python][Make Parameter Descriptions Consistent
 for PySpark MLlib FPM and Recommendation]

---
 python/pyspark/mllib/fpm.py            | 44 ++++++++++++--------
 python/pyspark/mllib/recommendation.py | 56 ++++++++++++++++++--------
 2 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index 2039decc0cb3c..5637a63b4ee0c 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -67,12 +67,14 @@ class FPGrowth(object):
     def train(cls, data, minSupport=0.3, numPartitions=-1):
         """
         Computes an FP-Growth model that contains frequent itemsets.
-
-        :param data: The input data set, each element contains a
-            transaction.
-        :param minSupport: The minimal support level (default: `0.3`).
-        :param numPartitions: The number of partitions used by
-            parallel FP-growth (default: same as input data).
+        :param data:
+          The input data set, each element contains a transaction.
+        :param minSupport:
+          The minimal support level.
+          (default: 0.3)
+        :param numPartitions:
+		      The number of partitions used by parallel FP-growth.
+          (default: same as input data)
         """
         model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
         return FPGrowthModel(model)
@@ -128,17 +130,25 @@ class PrefixSpan(object):
     @since("1.6.0")
     def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000):
         """
-        Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
-
-        :param data: The input data set, each element contains a sequnce of itemsets.
-        :param minSupport: the minimal support level of the sequential pattern, any pattern appears
-            more than  (minSupport * size-of-the-dataset) times will be output (default: `0.1`)
-        :param maxPatternLength: the maximal length of the sequential pattern, any pattern appears
-            less than maxPatternLength will be output. (default: `10`)
-        :param maxLocalProjDBSize: The maximum number of items (including delimiters used in
-            the internal storage format) allowed in a projected database before local
-            processing. If a projected database exceeds this size, another
-            iteration of distributed prefix growth is run. (default: `32000000`)
+        Finds the complete set of frequent sequential patterns in the input
+		    sequences of itemsets.
+        :param data:
+          The input data set, each element contains a sequnce of itemsets.
+        :param minSupport:
+          The minimal support level of the sequential pattern, any pattern
+		      appears more than (minSupport * size-of-the-dataset) times will be
+		      output.
+          (default: 0.1)
+        :param maxPatternLength:
+          The maximal length of the sequential pattern, any pattern
+          appears less than maxPatternLength will be output.
+          (default: 10)
+        :param maxLocalProjDBSize:
+          The maximum number of items (including delimiters used in the internal
+		      storage format) allowed in a projected database before local
+		      processing. If a projected database exceeds this size, another
+		      iteration of distributed prefix growth is run.
+          (default: 32000000)
         """
         model = callMLlibFunc("trainPrefixSpanModel",
                               data, minSupport, maxPatternLength, maxLocalProjDBSize)
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 93e47a797f490..680fa01931522 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -165,28 +165,30 @@ def productFeatures(self):
     @since("1.4.0")
     def recommendUsers(self, product, num):
         """
-        Recommends the top "num" number of users for a given product and returns a list
-        of Rating objects sorted by the predicted rating in descending order.
+        Recommends the top "num" number of users for a given product and returns a 
+		    list of Rating objects sorted by the predicted rating in descending order.
         """
         return list(self.call("recommendUsers", product, num))
 
     @since("1.4.0")
     def recommendProducts(self, user, num):
         """
-        Recommends the top "num" number of products for a given user and returns a list
-        of Rating objects sorted by the predicted rating in descending order.
+        Recommends the top "num" number of products for a given user and returns a
+		    list of Rating objects sorted by the predicted rating in descending order.
         """
         return list(self.call("recommendProducts", user, num))
 
     def recommendProductsForUsers(self, num):
         """
-        Recommends top "num" products for all users. The number returned may be less than this.
+        Recommends top "num" products for all users. The number returned may be 
+		    less than this.
         """
         return self.call("wrappedRecommendProductsForUsers", num)
 
     def recommendUsersForProducts(self, num):
         """
-        Recommends top "num" users for all products. The number returned may be less than this.
+        Recommends top "num" users for all products. The number returned may be 
+		    less than this.
         """
         return self.call("wrappedRecommendUsersForProducts", num)
 
@@ -234,11 +236,22 @@ def _prepare(cls, ratings):
     def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
               seed=None):
         """
-        Train a matrix factorization model given an RDD of ratings given by users to some products,
-        in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
-        product of two lower-rank matrices of a given rank (number of features). To solve for these
-        features, we run a given number of iterations of ALS. This is done using a level of
-        parallelism given by `blocks`.
+        Train a matrix factorization model given an RDD of ratings given by users
+		to some products, in the form of (userID, productID, rating) pairs. We
+		approximate the ratings matrix as the product of two lower-rank matrices
+		of a given rank (number of features). To solve for these features, we run
+		a given number of iterations of ALS. This is done using a level of
+		parallelism given by `blocks`.
+		
+		:param iterations:
+		  Number of iterations run for each batch of data.
+          (default: 5)
+		:param lambda_:
+          The smoothing parameter.
+          (default: 0.01)
+		:param seed:
+		  Random seed for initial matrix factorization model.
+          (default: None)
         """
         model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
                               lambda_, blocks, nonnegative, seed)
@@ -249,11 +262,22 @@ def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative
     def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
                       nonnegative=False, seed=None):
         """
-        Train a matrix factorization model given an RDD of 'implicit preferences' given by users
-        to some products, in the form of (userID, productID, preference) pairs. We approximate the
-        ratings matrix as the product of two lower-rank matrices of a given rank (number of
-        features).  To solve for these features, we run a given number of iterations of ALS.
-        This is done using a level of parallelism given by `blocks`.
+        Train a matrix factorization model given an RDD of 'implicit preferences'
+		given by users to some products, in the form of (userID, productID,
+		preference) pairs. We approximate the ratings matrix as the product of
+		two lower-rank matrices of a given rank (number of features).To solve
+		for these features, we run a given number of iterations of ALS. This is
+		done using a level of parallelism given by `blocks`.
+		
+		:param iterations:
+		  Number of iterations run for each batch of data.
+          (default: 5)
+		:param lambda_:
+          The smoothing parameter.
+          (default: 0.01)
+		:param seed:
+		  Random seed for initial matrix factorization model.
+          (default: None)
         """
         model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
                               iterations, lambda_, blocks, alpha, nonnegative, seed)