apache · zhengruifeng · Sep 1, 2022 · Yikun · Sep 2, 2022 · zhengruifeng
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -1039,7 +1039,8 @@ def sample(
         >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
         True
         """
-        assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        if not fraction >= 0:
+            raise ValueError("Fraction must be nonnegative.")
         return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
     def randomSplit(
@@ -1077,7 +1078,11 @@ def randomSplit(
         >>> 250 < rdd2.count() < 350
         True
         """
+        if not all(w >= 0 for w in weights):
+            raise ValueError("Weights must be nonnegative")
         s = float(sum(weights))
+        if not s > 0:
+            raise ValueError("Sum of weights must be positive")
         cweights = [0.0]
         for w in weights:
             cweights.append(cweights[-1] + w / s)
@@ -4565,6 +4570,8 @@ def coalesce(self: "RDD[T]", numPartitions: int, shuffle: bool = False) -> "RDD[
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
         [[1, 2, 3, 4, 5]]
         """
+        if not numPartitions > 0:
+            raise ValueError("Number of partitions must be positive.")
         if shuffle:
             # Decrease the batch size in order to distribute evenly the elements across output
             # partitions. Otherwise, repartition will possibly produce highly skewed partitions.