From e1dd3e0582c6314ec5a320b0a8e48203060d9666 Mon Sep 17 00:00:00 2001 From: pralabhkumar Date: Mon, 11 Apr 2022 19:36:25 +0530 Subject: [PATCH 1/4] Increasing code coverage for statcounter Increasing code coverage for statcounter Added comments --- python/pyspark/tests/test_statcounter.py | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 9651871e113a8..91b3f82b6d5bc 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -76,6 +76,31 @@ def test_merge_stats(self): self.assertEqual(stats.sum(), 20.0) self.assertAlmostEqual(stats.variance(), 1.25) self.assertAlmostEqual(stats.sampleVariance(), 1.4285714285714286) + for idx in range(2): + stats1 = StatCounter([1.0, 2.0]) + stats2 = StatCounter(range(1, 301)) + stats = stats1.mergeStats(stats2) if idx == 1 else stats2.mergeStats(stats1) + self.assertEqual(stats.count(), 302) + self.assertEqual(stats.max(), 300.0) + self.assertEqual(stats.min(), 1.0) + self.assertAlmostEqual(stats.mean(), 149.51324503311) + self.assertAlmostEqual(stats.variance(), 7596.302804701549) + self.assertAlmostEqual(stats.sampleVariance(), 7621.539691095905) + + def test_variance_when_size_zero(self): + # SPARK-38854 : Test case to improve test coverage when + # StatCounter argument is empty list or None + arguments = [[], None] + import math + + for arg in arguments: + stats = StatCounter(arg) + self.assertTrue(math.isnan(stats.variance())) + self.assertTrue(math.isnan(stats.sampleVariance())) + self.assertEqual(stats.count(), 0) + self.assertTrue(math.isinf(stats.max())) + self.assertTrue(math.isinf(stats.min())) + self.assertEqual(stats.mean(), 0.0) def test_merge_stats_with_self(self): stats = StatCounter([1.0, 2.0, 3.0, 4.0]) From 07f8e50601896bb683e82f7a3bca8d03900d44d8 Mon Sep 17 00:00:00 2001 From: pralabhkumar Date: Mon, 11 Apr 2022 23:05:44 +0530 Subject: [PATCH 2/4] Dummy push --- python/pyspark/tests/test_statcounter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 91b3f82b6d5bc..4e07280f12c28 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -88,7 +88,7 @@ def test_merge_stats(self): self.assertAlmostEqual(stats.sampleVariance(), 7621.539691095905) def test_variance_when_size_zero(self): - # SPARK-38854 : Test case to improve test coverage when + # SPARK-38854: Test case to improve test coverage when # StatCounter argument is empty list or None arguments = [[], None] import math From c76cb9b499f9404584e9d230e5428098b159c157 Mon Sep 17 00:00:00 2001 From: pralabhkumar Date: Tue, 12 Apr 2022 12:35:59 +0530 Subject: [PATCH 3/4] Done the changes as suggested --- python/pyspark/tests/test_statcounter.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 4e07280f12c28..02d74e7194c51 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -76,10 +76,11 @@ def test_merge_stats(self): self.assertEqual(stats.sum(), 20.0) self.assertAlmostEqual(stats.variance(), 1.25) self.assertAlmostEqual(stats.sampleVariance(), 1.4285714285714286) - for idx in range(2): - stats1 = StatCounter([1.0, 2.0]) - stats2 = StatCounter(range(1, 301)) - stats = stats1.mergeStats(stats2) if idx == 1 else stats2.mergeStats(stats1) + execution_statements = [ + StatCounter([1.0, 2.0]).mergeStats(StatCounter(range(1, 301))), + StatCounter(range(1, 301)).mergeStats(StatCounter([1.0, 2.0])), + ] + for stats in execution_statements: self.assertEqual(stats.count(), 302) self.assertEqual(stats.max(), 300.0) self.assertEqual(stats.min(), 1.0) From 4d9a79a90645af3175292094f0788b303cd18f57 Mon Sep 17 00:00:00 2001 From: pralabhkumar Date: Tue, 12 Apr 2022 14:32:23 +0530 Subject: [PATCH 4/4] Moved import to top of the file --- python/pyspark/tests/test_statcounter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 02d74e7194c51..b10fe7cd911c4 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -16,6 +16,7 @@ # from pyspark.statcounter import StatCounter from pyspark.testing.utils import ReusedPySparkTestCase +import math class StatCounterTests(ReusedPySparkTestCase): @@ -92,7 +93,6 @@ def test_variance_when_size_zero(self): # SPARK-38854: Test case to improve test coverage when # StatCounter argument is empty list or None arguments = [[], None] - import math for arg in arguments: stats = StatCounter(arg)