From 68f2a09a87495fe2d860fa9164a9a590bdf522d8 Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Thu, 10 Sep 2015 09:11:47 -0400 Subject: [PATCH 1/6] [SPARK-10535] Support for recommendUsersForProducts and recommendProductsForUsers in matrix factorization model for PySpark --- python/pyspark/mllib/recommendation.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 506ca2151cce..da5b109594ca 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -156,6 +156,18 @@ def recommendProducts(self, user, num): """ return list(self.call("recommendProducts", user, num)) + def recommendProductsForUsers(self, num): + """ + Recommends topK products for all users. + """ + return list(self.call("recommendProductsForUsers", num)) + + def recommendUsersForProducts(self, num): + """ + Recommends topK users for all products. + """ + return list(self.call("recommendUsersForProducts", num)) + @property def rank(self): return self.call("rank") From cb959d2caf31bf56bd00952e9fa23b5858d5fbcd Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Thu, 10 Sep 2015 18:13:44 -0400 Subject: [PATCH 2/6] documentation --- python/pyspark/mllib/recommendation.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index da5b109594ca..b7929e7c9c26 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -85,6 +85,12 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> len(latents) == 4 True + >>> products_for_users = model.recommendProductsForUsers(1).collect() + ... + + >>> users_for_products = model.recommendUsersForProducts(1).collect() + ... + >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10) >>> model.predict(2, 2) 3.8... @@ -160,13 +166,13 @@ def recommendProductsForUsers(self, num): """ Recommends topK products for all users. """ - return list(self.call("recommendProductsForUsers", num)) + return self.call("recommendProductsForUsers", num) def recommendUsersForProducts(self, num): """ Recommends topK users for all products. """ - return list(self.call("recommendUsersForProducts", num)) + return self.call("recommendUsersForProducts", num) @property def rank(self): From bc7dc4600ad7aa9ac0a7c3ce6c1ae2697dbab329 Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Fri, 11 Sep 2015 16:16:43 -0400 Subject: [PATCH 3/6] wrappers to convert Tuples to Array --- .../api/python/MatrixFactorizationModelWrapper.scala | 11 +++++++++++ python/pyspark/mllib/recommendation.py | 12 ++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala index 534edac56bc5..342352ca7f43 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala @@ -42,4 +42,15 @@ private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorization case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } + + def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { + SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) + } + + def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { + SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]] + ) + } + + } diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index b7929e7c9c26..0aaa1c737cad 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -86,10 +86,14 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): True >>> products_for_users = model.recommendProductsForUsers(1).collect() - ... + >>> len(products_for_users) == 2 + >>> products_for_users[0] + (1, (Rating(user=1, product=1, rating=1.1....),)) >>> users_for_products = model.recommendUsersForProducts(1).collect() - ... + >>> len(users_for_products) == 2 + >>> users_for_products[0] + (1, (Rating(user=1, product=1, rating=1.1....),)) >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10) >>> model.predict(2, 2) @@ -166,13 +170,13 @@ def recommendProductsForUsers(self, num): """ Recommends topK products for all users. """ - return self.call("recommendProductsForUsers", num) + return self.call("wrappedRecommendProductsForUsers", num) def recommendUsersForProducts(self, num): """ Recommends topK users for all products. """ - return self.call("recommendUsersForProducts", num) + return self.call("wrappedRecommendUsersForProducts", num) @property def rank(self): From 056b3214d58f5eb28d086927788782aea7ad39f8 Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Sun, 20 Sep 2015 10:45:40 -0400 Subject: [PATCH 4/6] updated style based on code review feedback --- .../MatrixFactorizationModelWrapper.scala | 5 +---- python/pyspark/mllib/recommendation.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala index 342352ca7f43..eeb7cba882ce 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala @@ -48,9 +48,6 @@ private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorization } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { - SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]] - ) + SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } - - } diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 0aaa1c737cad..a80e0351ac1f 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -74,26 +74,28 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> first_user = model.userFeatures().take(1)[0] >>> latents = first_user[1] - >>> len(latents) == 4 - True + >>> len(latents) + 4 >>> model.productFeatures().collect() [(1, array('d', [...])), (2, array('d', [...]))] >>> first_product = model.productFeatures().take(1)[0] >>> latents = first_product[1] - >>> len(latents) == 4 - True + >>> len(latents) + 4 >>> products_for_users = model.recommendProductsForUsers(1).collect() - >>> len(products_for_users) == 2 + >>> len(products_for_users) + 2 >>> products_for_users[0] - (1, (Rating(user=1, product=1, rating=1.1....),)) + (1, (Rating(user=., product=., rating=1.....),)) >>> users_for_products = model.recommendUsersForProducts(1).collect() - >>> len(users_for_products) == 2 + >>> len(users_for_products) + 2 >>> users_for_products[0] - (1, (Rating(user=1, product=1, rating=1.1....),)) + (1, (Rating(user=., product=., rating=1.....),)) >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10) >>> model.predict(2, 2) From 969e4c343de6922dd672a8a46e82f8a338b129a7 Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Sun, 20 Sep 2015 10:51:04 -0400 Subject: [PATCH 5/6] docs for num argument --- python/pyspark/mllib/recommendation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index a80e0351ac1f..1b869a2d3911 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -170,13 +170,13 @@ def recommendProducts(self, user, num): def recommendProductsForUsers(self, num): """ - Recommends topK products for all users. + Recommends top "num" products for all users. The number returned may be less than this. """ return self.call("wrappedRecommendProductsForUsers", num) def recommendUsersForProducts(self, num): """ - Recommends topK users for all products. + Recommends top "num" users for all products. The number returned may be less than this. """ return self.call("wrappedRecommendUsersForProducts", num) From 56d33214d324fc158b29ff3b08f7c409f19433a1 Mon Sep 17 00:00:00 2001 From: Vladimir Vladimirov Date: Tue, 22 Sep 2015 09:10:44 -0400 Subject: [PATCH 6/6] fixed ELIPSES in doctests --- python/pyspark/mllib/recommendation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 1b869a2d3911..cf552fc4a4d9 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -89,13 +89,13 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> len(products_for_users) 2 >>> products_for_users[0] - (1, (Rating(user=., product=., rating=1.....),)) + (1, (Rating(user=1, product=2, rating=...),)) >>> users_for_products = model.recommendUsersForProducts(1).collect() >>> len(users_for_products) 2 >>> users_for_products[0] - (1, (Rating(user=., product=., rating=1.....),)) + (1, (Rating(user=2, product=1, rating=...),)) >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10) >>> model.predict(2, 2)