improve java doc for DataSkewRangePartitioner

apache · Apr 10, 2019 · cd25558 · cd25558
1 parent 7f05e69
commit cd25558
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala b/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala
@@ -40,7 +40,7 @@ import org.apache.spark.util.{CollectionsUtils, Utils}
  * the rangeBounds are also the distinct values, but it calculates the skew weight.
  * So some rangeBounds maybe have more than one partitions.
  *
- * for example, split following CSV file to 5 partitions:
+ * for example, split following CSV file to 5 partitions by col2:
  * ---------------
  * col1,col2
  * 1,
@@ -77,6 +77,7 @@ import org.apache.spark.util.{CollectionsUtils, Utils}
  * --------------------------------------------------------------
  * The skew weight of range bound "null" is 2.
  * So it will start two tasks for range bound "null" to create two partitions.
+ * For a range bound, the number of final partitions is the same as the skew weight.
  */
 class DataSkewRangePartitioner[K: Ordering : ClassTag, V](
     partitions: Int,