From de966fd0a4e9fe0984bba3969ac3474aa8e3d038 Mon Sep 17 00:00:00 2001 From: Pat McDonough Date: Thu, 18 Sep 2014 00:00:47 -0700 Subject: [PATCH 1/2] SPARK-3389: New public method for RDD's to have consistent way of obtaining the number of RDD partitions across languages --- .../main/scala/org/apache/spark/rdd/RDD.scala | 17 +++++++++++++++++ .../scala/org/apache/spark/rdd/RDDSuite.scala | 1 + 2 files changed, 18 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index a9b905b0d1a63..c3a4b07fc22d9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -207,6 +207,23 @@ abstract class RDD[T: ClassTag]( } } + /** + * Get the number of partitions in this RDD + * + * {{{ + * scala> val rdd = sc.parallelize(1 to 4, 2) + * rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[1] at parallelize at :12 + * + * scala> rdd.getNumPartitions + * res1: Int = 2 + * }}} + * + * @return The number of partitions in this RDD + */ + def getNumPartitions: Int = { + partitions.size + } + /** * Get the preferred locations of a partition (as hostnames), taking into account whether the * RDD is checkpointed. diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index c1b501a75c8b8..dba45996be7f9 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -34,6 +34,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { test("basic operations") { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) + assert(nums.getNumPartitions === 2) assert(nums.collect().toList === List(1, 2, 3, 4)) assert(nums.toLocalIterator.toList === List(1, 2, 3, 4)) val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2) From afc4e097842e45f50251a9340371b5ded0a65ae0 Mon Sep 17 00:00:00 2001 From: Pat McDonough Date: Fri, 19 Sep 2014 09:26:58 -0700 Subject: [PATCH 2/2] Fixing code style per PR comments --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index c3a4b07fc22d9..a13f71fd4de01 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -220,9 +220,7 @@ abstract class RDD[T: ClassTag]( * * @return The number of partitions in this RDD */ - def getNumPartitions: Int = { - partitions.size - } + def getNumPartitions: Int = partitions.size /** * Get the preferred locations of a partition (as hostnames), taking into account whether the