From a21de669701ec0a985dcccf04e76bf942dd48ea0 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 12 Feb 2018 11:28:41 +0800 Subject: [PATCH 1/4] [SPARK-23392][TEST] Add some test case for images feature revoke the change og path --- .../spark/ml/image/ImageSchemaSuite.scala | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala index a8833c615865d..c184155d9d070 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala @@ -65,11 +65,71 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { assert(count50 > 0 && count50 < countTotal) } + test("readImages test: recursive = false") { + val df = readImages(imagePath, null, false, 3, true, 1.0, 0) + assert(df.count() === 0) + } + + test("readImages test: read jpg image") { + val df = readImages(imagePath + "/kittens/DP153539.jpg", null, false, 3, true, 1.0, 0) + assert(df.count() === 1) + } + + test("readImages test: read png image") { + val df = readImages(imagePath + "/multi-channel/BGRA.png", null, false, 3, true, 1.0, 0) + assert(df.count() === 1) + } + + test("readImages test: read non image") { + val df = readImages(imagePath + "/kittens/not-image.txt", null, false, 3, true, 1.0, 0) + assert(df.count() === 0) + } + + test("readImages test: read non image and dropImageFailures is false") { + val df = readImages(imagePath + "/kittens/not-image.txt", null, false, 3, false, 1.0, 0) + assert(df.count() === 1) + } + + test("readImages test: sampleRatio > 1") { + val e = intercept[IllegalArgumentException] { + readImages(imagePath, null, true, 3, true, 1.1, 0) + } + assert(e.getMessage.equals("requirement failed: sampleRatio should be between 0 and 1")) + } + + test("readImages test: sampleRatio < 0") { + val e = intercept[IllegalArgumentException] { + readImages(imagePath, null, true, 3, true, -0.1, 0) + } + assert(e.getMessage.equals("requirement failed: sampleRatio should be between 0 and 1")) + } + + test("readImages test: sampleRatio = 0") { + val df = readImages(imagePath, null, true, 3, true, 0.0, 0) + assert(df.count() === 0) + } + + test("readImages test: with sparkSession") { + val df = readImages(imagePath, sparkSession = spark, true, 3, true, 1.0, 0) + assert(df.count() === 7) + } + test("readImages partition test") { val df = readImages(imagePath, null, true, 3, true, 1.0, 0) assert(df.rdd.getNumPartitions === 3) } + test("readImages partition test: < 0") { + val df = readImages(imagePath, null, true, -3, true, 1.0, 0) + assert(df.rdd.getNumPartitions === spark.sparkContext.defaultParallelism) + } + + test("readImages partition test: = 0") { + val df = readImages(imagePath, null, true, 0, true, 1.0, 0) + assert(df.rdd.getNumPartitions != 0) + assert(df.rdd.getNumPartitions === spark.sparkContext.defaultParallelism) + } + // Images with the different number of channels test("readImages pixel values test") { @@ -93,7 +153,7 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { // - default representation for 3-channel RGB images is BGR row-wise: // (B00, G00, R00, B10, G10, R10, ...) // - default representation for 4-channel RGB images is BGRA row-wise: - // (B00, G00, R00, A00, B10, G10, R10, A00, ...) + // (B00, G00, R00, A00, B10, G10, R10, A10, ...) private val firstBytes20 = Map( "grayscale.jpg" -> (("CV_8UC1", Array[Byte](-2, -33, -61, -60, -59, -59, -64, -59, -66, -67, -73, -73, -62, From f51f518a0f21b1400c2fa8d3d8c62e3bafed5136 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 12 Feb 2018 15:26:28 +0800 Subject: [PATCH 2/4] optimize code according the review comment --- .../scala/org/apache/spark/ml/image/ImageSchemaSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala index c184155d9d070..ebbf0d41ce937 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types._ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { // Single column of images named "image" - private lazy val imagePath = "../data/mllib/images" + private lazy val imagePath = "../../data/mllib/images" test("Smoke test: create basic ImageSchema dataframe") { val origin = "path" @@ -82,6 +82,7 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { test("readImages test: read non image") { val df = readImages(imagePath + "/kittens/not-image.txt", null, false, 3, true, 1.0, 0) + assert(df.schema("image").dataType == columnSchema, "data do not fit ImageSchema") assert(df.count() === 0) } @@ -111,7 +112,7 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { test("readImages test: with sparkSession") { val df = readImages(imagePath, sparkSession = spark, true, 3, true, 1.0, 0) - assert(df.count() === 7) + assert(df.count() === 8) } test("readImages partition test") { @@ -126,7 +127,6 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { test("readImages partition test: = 0") { val df = readImages(imagePath, null, true, 0, true, 1.0, 0) - assert(df.rdd.getNumPartitions != 0) assert(df.rdd.getNumPartitions === spark.sparkContext.defaultParallelism) } From b67955abf3a73adcca496cdc1557a866ec780ba9 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 12 Feb 2018 15:30:47 +0800 Subject: [PATCH 3/4] revoke path change --- .../test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala index ebbf0d41ce937..2c620ed44d3f3 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.types._ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { // Single column of images named "image" - private lazy val imagePath = "../../data/mllib/images" + private lazy val imagePath = "../data/mllib/images" test("Smoke test: create basic ImageSchema dataframe") { val origin = "path" From 4c18e232725f18156b56138471c52918d3fb83b3 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Tue, 13 Feb 2018 09:36:06 +0800 Subject: [PATCH 4/4] optimize code according to review comment --- .../scala/org/apache/spark/ml/image/ImageSchemaSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala index 2c620ed44d3f3..527b3f8955968 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/image/ImageSchemaSuite.scala @@ -95,14 +95,14 @@ class ImageSchemaSuite extends SparkFunSuite with MLlibTestSparkContext { val e = intercept[IllegalArgumentException] { readImages(imagePath, null, true, 3, true, 1.1, 0) } - assert(e.getMessage.equals("requirement failed: sampleRatio should be between 0 and 1")) + assert(e.getMessage.contains("sampleRatio")) } test("readImages test: sampleRatio < 0") { val e = intercept[IllegalArgumentException] { readImages(imagePath, null, true, 3, true, -0.1, 0) } - assert(e.getMessage.equals("requirement failed: sampleRatio should be between 0 and 1")) + assert(e.getMessage.contains("sampleRatio")) } test("readImages test: sampleRatio = 0") {