From c9961204260bde7d0070afeeadcf6354fb1e62fe Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Sat, 14 Jun 2014 14:47:11 -0700 Subject: [PATCH 1/2] SPARK-2146. Fix takeOrdered doc --- .../main/scala/org/apache/spark/rdd/RDD.scala | 16 ++++++++-------- python/pyspark/rdd.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 54bdc3e7cbc7a..666dba6240bec 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1052,11 +1052,11 @@ abstract class RDD[T: ClassTag]( * Returns the top K (largest) elements from this RDD as defined by the specified * implicit Ordering[T]. This does the opposite of [[takeOrdered]]. For example: * {{{ - * sc.parallelize([10, 4, 2, 12, 3]).top(1) - * // returns [12] + * sc.parallelize(Array(10, 4, 2, 12, 3)).top(1) + * // returns Array(12) * - * sc.parallelize([2, 3, 4, 5, 6]).top(2) - * // returns [6, 5] + * sc.parallelize(Array(2, 3, 4, 5, 6)).top(2) + * // returns Array(6, 5) * }}} * * @param num the number of top elements to return @@ -1070,11 +1070,11 @@ abstract class RDD[T: ClassTag]( * implicit Ordering[T] and maintains the ordering. This does the opposite of [[top]]. * For example: * {{{ - * sc.parallelize([10, 4, 2, 12, 3]).takeOrdered(1) - * // returns [12] + * sc.parallelize(Array(10, 4, 2, 12, 3)).takeOrdered(1) + * // returns Array(2) * - * sc.parallelize([2, 3, 4, 5, 6]).takeOrdered(2) - * // returns [2, 3] + * sc.parallelize(Array(2, 3, 4, 5, 6)).takeOrdered(2) + * // returns Array(2, 3) * }}} * * @param num the number of top elements to return diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 9c69c79236edc..8ad5eb6878627 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -803,7 +803,7 @@ def top(self, num): Note: It returns the list sorted in descending order. >>> sc.parallelize([10, 4, 2, 12, 3]).top(1) [12] - >>> sc.parallelize([2, 3, 4, 5, 6], 2).cache().top(2) + >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2) [6, 5] """ def topIterator(iterator): From 185ff189d6b53f8045b02c943cd5e64b11ddec06 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Mon, 16 Jun 2014 00:14:50 -0700 Subject: [PATCH 2/2] Use Seq instead of Array --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 666dba6240bec..c55f4511b1e7c 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1052,10 +1052,10 @@ abstract class RDD[T: ClassTag]( * Returns the top K (largest) elements from this RDD as defined by the specified * implicit Ordering[T]. This does the opposite of [[takeOrdered]]. For example: * {{{ - * sc.parallelize(Array(10, 4, 2, 12, 3)).top(1) + * sc.parallelize(Seq(10, 4, 2, 12, 3)).top(1) * // returns Array(12) * - * sc.parallelize(Array(2, 3, 4, 5, 6)).top(2) + * sc.parallelize(Seq(2, 3, 4, 5, 6)).top(2) * // returns Array(6, 5) * }}} * @@ -1070,10 +1070,10 @@ abstract class RDD[T: ClassTag]( * implicit Ordering[T] and maintains the ordering. This does the opposite of [[top]]. * For example: * {{{ - * sc.parallelize(Array(10, 4, 2, 12, 3)).takeOrdered(1) + * sc.parallelize(Seq(10, 4, 2, 12, 3)).takeOrdered(1) * // returns Array(2) * - * sc.parallelize(Array(2, 3, 4, 5, 6)).takeOrdered(2) + * sc.parallelize(Seq(2, 3, 4, 5, 6)).takeOrdered(2) * // returns Array(2, 3) * }}} *