From 9209c646a5589943cf7ebe90b7a49976ca6a3490 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 6 Jul 2015 15:33:16 -0700 Subject: [PATCH 1/5] UTF8String Property Checks. --- unsafe/pom.xml | 4 ++ .../types/UTF8StringProperyChecks.scala | 66 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala diff --git a/unsafe/pom.xml b/unsafe/pom.xml index 33782c6c66f90..0065961faf74a 100644 --- a/unsafe/pom.xml +++ b/unsafe/pom.xml @@ -70,6 +70,10 @@ mockito-core test + + org.scalacheck + scalacheck_${scala.binary.version} + target/scala-${scala.binary.version}/classes diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala new file mode 100644 index 0000000000000..c5d10bf6e6dbf --- /dev/null +++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala @@ -0,0 +1,66 @@ +package org.apache.spark.unsafe.types + +import org.scalatest.prop.GeneratorDrivenPropertyChecks +import org.scalatest.{FunSuite, Matchers} +import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8} + + +class UTF8StringSuite2 extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { + + test("toString") { + forAll { (s: String) => + assert(s === toUTF8(s).toString()) + } + } + + test("numChars") { + forAll { (s: String) => + assert(toUTF8(s).numChars() === s.length) + } + } + + test("startsWith") { + forAll { (s: String) => + val utf8 = toUTF8(s) + assert(utf8.startsWith(utf8)) + for (i <- 1 to s.length) { + assert(utf8.startsWith(toUTF8(s.dropRight(i)))) + } + } + } + + test("endsWith") { + forAll { (s: String) => + val utf8 = toUTF8(s) + assert(utf8.endsWith(utf8)) + for (i <- 1 to s.length) { + assert(utf8.endsWith(toUTF8(s.drop(i)))) + } + } + } + + test("toUpperCase") { + forAll { (s: String) => + assert(s.toUpperCase === toUTF8(s).toUpperCase.toString) + } + } + + test("toLowerCase") { + forAll { (s: String) => + assert(s.toLowerCase === toUTF8(s).toLowerCase.toString) + } + } + + test("compare") { + forAll { (s1: String, s2: String) => + assert(Math.signum(s1.compareTo(s2)) === Math.signum(toUTF8(s1).compareTo(toUTF8(s2)))) + } + } + + test("substring") { + forAll { (s: String) => + assert(s.substring(0, 0) === toUTF8(s).substring(0, 0).toString) + assert(s.substring(0, s.length) === toUTF8(s).substring(0, s.length).toString) + } + } +} From 49ed069799c02400a0ae793a308938bebf6f95ad Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 6 Jul 2015 15:54:17 -0700 Subject: [PATCH 2/5] Rename suite --- ...ringProperyChecks.scala => UTF8StringPropertyChecks.scala} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename unsafe/src/test/scala/org/apache/spark/unsafe/types/{UTF8StringProperyChecks.scala => UTF8StringPropertyChecks.scala} (93%) diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala similarity index 93% rename from unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala rename to unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala index c5d10bf6e6dbf..033dd58aa4732 100644 --- a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringProperyChecks.scala +++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala @@ -2,10 +2,10 @@ package org.apache.spark.unsafe.types import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.scalatest.{FunSuite, Matchers} -import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8} +import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8} -class UTF8StringSuite2 extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { +class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { test("toString") { forAll { (s: String) => From 52f51a0bd713766699fb383cc396e027f20e0cae Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Wed, 29 Jul 2015 23:19:55 -0700 Subject: [PATCH 3/5] Add some more failing tests --- .../types/UTF8StringPropertyChecks.scala | 71 ++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala index 033dd58aa4732..b0852bd812e6e 100644 --- a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala +++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala @@ -1,5 +1,6 @@ package org.apache.spark.unsafe.types +import org.scalacheck.{Arbitrary, Gen} import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.scalatest.{FunSuite, Matchers} @@ -59,8 +60,74 @@ class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChec test("substring") { forAll { (s: String) => - assert(s.substring(0, 0) === toUTF8(s).substring(0, 0).toString) - assert(s.substring(0, s.length) === toUTF8(s).substring(0, s.length).toString) + for (start <- 0 to s.length; end <- 0 to s.length) { + withClue(s"start=$start, end=$end") { + assert(s.substring(start, end) === toUTF8(s).substring(start, end).toString) + } + } + } + } + + // TODO: substringSQL + + test("contains") { + forAll { (s: String) => + for (start <- 0 to s.length; end <- 0 to s.length) { + val substring = s.substring(start, end) + withClue(s"substring=$substring") { + assert(s.contains(substring) === toUTF8(s).contains(toUTF8(substring))) + } + } } } + + val whitespaceChar: Gen[Char] = Gen.choose(0x00, 0x20).map(_.toChar) + val whitespaceString: Gen[String] = Gen.listOf(whitespaceChar).map(_.mkString) + val randomString: Gen[String] = Arbitrary.arbString.arbitrary + + test("trim, trimLeft, trimRight") { + forAll( + whitespaceString, + randomString, + whitespaceString + ) { (start: String, middle: String, end: String) => + val s = start + middle + end + assert(s.trim() === toUTF8(s).trim().toString) + assert(s.stripMargin === toUTF8(s).trimLeft().toString) + assert(s.reverse.stripMargin.reverse === toUTF8(s).trimRight().toString) + } + } + + test("reverse") { + forAll() { (s: String) => + assert(s.reverse === toUTF8(s).reverse.toString) + } + } + + // TODO: repeat + // TODO: indexOf + // TODO: lpad + // TODO: rpad + + test("concat") { + forAll() { (inputs: Seq[String]) => + // TODO: test case where at least one of the inputs is null + assert(inputs.mkString === UTF8String.concat(inputs.map(toUTF8): _*).toString) + } + } + + test("concatWs") { + forAll() { (sep: String, inputs: Seq[String]) => + // TODO: handle case where at least one of the inputs is null + assert( + inputs.mkString(sep) === UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*).toString) + } + } + + // TODO: split + + // TODO: levenshteinDistance that tests against StringUtils' implementation + + // TODO: equals(), hashCode(), and compare() + } From c0800e64c9461cd527f5a2c6a2fcf643bfa3a28c Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Fri, 31 Jul 2015 22:31:28 +0800 Subject: [PATCH 4/5] Finish all todos in suite --- unsafe/pom.xml | 6 + .../apache/spark/unsafe/types/UTF8String.java | 19 +- .../spark/unsafe/types/UTF8StringSuite.java | 13 +- .../types/UTF8StringPropertyChecks.scala | 179 ++++++++++++++---- 4 files changed, 173 insertions(+), 44 deletions(-) diff --git a/unsafe/pom.xml b/unsafe/pom.xml index 0065961faf74a..89475ee3cf5a1 100644 --- a/unsafe/pom.xml +++ b/unsafe/pom.xml @@ -73,6 +73,12 @@ org.scalacheck scalacheck_${scala.binary.version} + test + + + org.apache.commons + commons-lang3 + test diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index c38953f65d7d7..86115f1a83cbe 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -301,10 +301,9 @@ public UTF8String trim() { int s = 0; int e = this.numBytes - 1; // skip all of the space (0x20) in the left side - while (s < this.numBytes && getByte(s) == 0x20) s++; + while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++; // skip all of the space (0x20) in the right side - while (e >= 0 && getByte(e) == 0x20) e--; - + while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--; if (s > e) { // empty string return UTF8String.fromBytes(new byte[0]); @@ -316,7 +315,7 @@ public UTF8String trim() { public UTF8String trimLeft() { int s = 0; // skip all of the space (0x20) in the left side - while (s < this.numBytes && getByte(s) == 0x20) s++; + while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++; if (s == this.numBytes) { // empty string return UTF8String.fromBytes(new byte[0]); @@ -328,7 +327,7 @@ public UTF8String trimLeft() { public UTF8String trimRight() { int e = numBytes - 1; // skip all of the space (0x20) in the right side - while (e >= 0 && getByte(e) == 0x20) e--; + while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--; if (e < 0) { // empty string @@ -354,7 +353,7 @@ public UTF8String reverse() { } public UTF8String repeat(int times) { - if (times <=0) { + if (times <= 0) { return EMPTY_UTF8; } @@ -414,7 +413,7 @@ public int indexOf(UTF8String v, int start) { */ public UTF8String rpad(int len, UTF8String pad) { int spaces = len - this.numChars(); // number of char need to pad - if (spaces <= 0) { + if (spaces <= 0 || pad.numChars() == 0) { // no padding at all, return the substring of the current string return substring(0, len); } else { @@ -429,7 +428,7 @@ public UTF8String rpad(int len, UTF8String pad) { int idx = 0; while (idx < count) { copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes); - ++idx; + ++ idx; offset += pad.numBytes; } copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes); @@ -446,7 +445,7 @@ public UTF8String rpad(int len, UTF8String pad) { */ public UTF8String lpad(int len, UTF8String pad) { int spaces = len - this.numChars(); // number of char need to pad - if (spaces <= 0) { + if (spaces <= 0 || pad.numChars() == 0) { // no padding at all, return the substring of the current string return substring(0, len); } else { @@ -461,7 +460,7 @@ public UTF8String lpad(int len, UTF8String pad) { int idx = 0; while (idx < count) { copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes); - ++idx; + ++ idx; offset += pad.numBytes; } copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes); diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index f2cc19ca6b172..2a82421d1354a 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -271,7 +271,6 @@ public void pad() { assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????"))); assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????"))); - assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????"))); assertEquals(fromString("?数据砖头"), fromString("数据砖头").lpad(5, fromString("????"))); assertEquals(fromString("??数据砖头"), fromString("数据砖头").lpad(6, fromString("????"))); @@ -289,6 +288,18 @@ public void pad() { assertEquals( fromString("数据砖头孙行者孙行者孙行"), fromString("数据砖头").rpad(12, fromString("孙行者"))); + + assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, fromString("孙行者"))); + assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, EMPTY_UTF8)); + assertEquals(fromString("数据砖头"), fromString("数据砖头").lpad(5, EMPTY_UTF8)); + assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, EMPTY_UTF8)); + assertEquals(EMPTY_UTF8, EMPTY_UTF8.lpad(3, EMPTY_UTF8)); + + assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, fromString("孙行者"))); + assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, EMPTY_UTF8)); + assertEquals(fromString("数据砖头"), fromString("数据砖头").rpad(5, EMPTY_UTF8)); + assertEquals(fromString("数据砖"), fromString("数据砖头").rpad(3, EMPTY_UTF8)); + assertEquals(EMPTY_UTF8, EMPTY_UTF8.rpad(3, EMPTY_UTF8)); } @Test diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala index b0852bd812e6e..6a14ce4977c1d 100644 --- a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala +++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala @@ -1,16 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.spark.unsafe.types +import org.apache.commons.lang3.StringUtils + import org.scalacheck.{Arbitrary, Gen} import org.scalatest.prop.GeneratorDrivenPropertyChecks +// scalastyle:off import org.scalatest.{FunSuite, Matchers} import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8} class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { +// scalastyle:on test("toString") { forAll { (s: String) => - assert(s === toUTF8(s).toString()) + assert(toUTF8(s).toString() === s) } } @@ -42,41 +63,35 @@ class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChec test("toUpperCase") { forAll { (s: String) => - assert(s.toUpperCase === toUTF8(s).toUpperCase.toString) + assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase)) } } test("toLowerCase") { forAll { (s: String) => - assert(s.toLowerCase === toUTF8(s).toLowerCase.toString) + assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase)) } } test("compare") { forAll { (s1: String, s2: String) => - assert(Math.signum(s1.compareTo(s2)) === Math.signum(toUTF8(s1).compareTo(toUTF8(s2)))) + assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2))) } } test("substring") { forAll { (s: String) => - for (start <- 0 to s.length; end <- 0 to s.length) { - withClue(s"start=$start, end=$end") { - assert(s.substring(start, end) === toUTF8(s).substring(start, end).toString) - } + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { + assert(toUTF8(s).substring(start, end).toString === s.substring(start, end)) } } } - // TODO: substringSQL - test("contains") { forAll { (s: String) => - for (start <- 0 to s.length; end <- 0 to s.length) { + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { val substring = s.substring(start, end) - withClue(s"substring=$substring") { - assert(s.contains(substring) === toUTF8(s).contains(toUTF8(substring))) - } + assert(toUTF8(s).contains(toUTF8(substring)) === s.contains(substring)) } } } @@ -86,48 +101,146 @@ class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChec val randomString: Gen[String] = Arbitrary.arbString.arbitrary test("trim, trimLeft, trimRight") { + // lTrim and rTrim are both modified from java.lang.String.trim + def lTrim(s: String): String = { + var st = 0 + val array: Array[Char] = s.toCharArray + while ((st < s.length) && (array(st) <= ' ')) { + st += 1 + } + if (st > 0) s.substring(st, s.length) else s + } + def rTrim(s: String): String = { + var len = s.length + val array: Array[Char] = s.toCharArray + while ((len > 0) && (array(len - 1) <= ' ')) { + len -= 1 + } + if (len < s.length) s.substring(0, len) else s + } + forAll( whitespaceString, randomString, whitespaceString ) { (start: String, middle: String, end: String) => val s = start + middle + end - assert(s.trim() === toUTF8(s).trim().toString) - assert(s.stripMargin === toUTF8(s).trimLeft().toString) - assert(s.reverse.stripMargin.reverse === toUTF8(s).trimRight().toString) + assert(toUTF8(s).trim() === toUTF8(s.trim())) + assert(toUTF8(s).trimLeft() === toUTF8(lTrim(s))) + assert(toUTF8(s).trimRight() === toUTF8(rTrim(s))) } } test("reverse") { - forAll() { (s: String) => - assert(s.reverse === toUTF8(s).reverse.toString) + forAll { (s: String) => + assert(toUTF8(s).reverse === toUTF8(s.reverse)) } } - // TODO: repeat - // TODO: indexOf - // TODO: lpad - // TODO: rpad + test("indexOf") { + forAll { (s: String) => + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { + val substring = s.substring(start, end) + assert(toUTF8(s).indexOf(toUTF8(substring), 0) === s.indexOf(substring)) + } + } + } + + val randomInt = Gen.choose(-100, 100) + + test("repeat") { + def repeat(str: String, times: Int): String = { + if (times > 0) str * times else "" + } + // ScalaCheck always generating too large repeat times which might hang the test forever. + forAll(randomString, randomInt) { (s: String, times: Int) => + assert(toUTF8(s).repeat(times) === toUTF8(repeat(s, times))) + } + } + + test("lpad, rpad") { + def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = { + if (length <= 0) return "" + if (length <= origin.length) { + if (length <= 0) "" else origin.substring(0, length) + } else { + if (pad.length == 0) return origin + val toPad = length - origin.length + val partPad = if (toPad % pad.length == 0) "" else pad.substring(0, toPad % pad.length) + if (isLPad) { + pad * (toPad / pad.length) + partPad + origin + } else { + origin + pad * (toPad / pad.length) + partPad + } + } + } + + forAll ( + randomString, + randomString, + randomInt + ) { (s: String, pad: String, length: Int) => + assert(toUTF8(s).lpad(length, toUTF8(pad)) === + toUTF8(padding(s, pad, length, true))) + assert(toUTF8(s).rpad(length, toUTF8(pad)) === + toUTF8(padding(s, pad, length, false))) + } + } + + val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString)) test("concat") { - forAll() { (inputs: Seq[String]) => - // TODO: test case where at least one of the inputs is null - assert(inputs.mkString === UTF8String.concat(inputs.map(toUTF8): _*).toString) + def concat(orgin: Seq[String]): String = + if (orgin.exists(_ == null)) null else orgin.mkString + + forAll { (inputs: Seq[String]) => + assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString)) + } + forAll (nullalbeSeq) { (inputs: Seq[String]) => + assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs))) } } test("concatWs") { - forAll() { (sep: String, inputs: Seq[String]) => - // TODO: handle case where at least one of the inputs is null - assert( - inputs.mkString(sep) === UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*).toString) + def concatWs(sep: String, inputs: Seq[String]): String = { + if (sep == null) return null + inputs.filter(_ != null).mkString(sep) + } + + forAll { (sep: String, inputs: Seq[String]) => + assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === + toUTF8(inputs.mkString(sep))) + } + forAll(randomString, nullalbeSeq) {(sep: String, inputs: Seq[String]) => + assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === + toUTF8(concatWs(sep, inputs))) } } - // TODO: split + // TODO: enable this when we find a proper way to generate valid patterns + ignore("split") { + forAll { (s: String, pattern: String, limit: Int) => + assert(toUTF8(s).split(toUTF8(pattern), limit) === + s.split(pattern, limit).map(toUTF8(_))) + } + } - // TODO: levenshteinDistance that tests against StringUtils' implementation + test("levenshteinDistance") { + forAll { (one: String, another: String) => + assert(toUTF8(one).levenshteinDistance(toUTF8(another)) === + StringUtils.getLevenshteinDistance(one, another)) + } + } - // TODO: equals(), hashCode(), and compare() + test("hashCode") { + forAll { (s: String) => + assert(toUTF8(s).hashCode() === toUTF8(s).hashCode()) + } + } + test("equals") { + forAll { (one: String, another: String) => + assert(toUTF8(one).equals(toUTF8(another)) === one.equals(another)) + } + } } From 593da3a78b1a6f721518754beec6a27b884e9b96 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Sat, 1 Aug 2015 09:49:50 +0800 Subject: [PATCH 5/5] resolve comments --- .../main/java/org/apache/spark/unsafe/types/UTF8String.java | 4 ++-- ...opertyChecks.scala => UTF8StringPropertyCheckSuite.scala} | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) rename unsafe/src/test/scala/org/apache/spark/unsafe/types/{UTF8StringPropertyChecks.scala => UTF8StringPropertyCheckSuite.scala} (97%) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 86115f1a83cbe..c9165562e555e 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -413,7 +413,7 @@ public int indexOf(UTF8String v, int start) { */ public UTF8String rpad(int len, UTF8String pad) { int spaces = len - this.numChars(); // number of char need to pad - if (spaces <= 0 || pad.numChars() == 0) { + if (spaces <= 0 || pad.numBytes() == 0) { // no padding at all, return the substring of the current string return substring(0, len); } else { @@ -445,7 +445,7 @@ public UTF8String rpad(int len, UTF8String pad) { */ public UTF8String lpad(int len, UTF8String pad) { int spaces = len - this.numChars(); // number of char need to pad - if (spaces <= 0 || pad.numChars() == 0) { + if (spaces <= 0 || pad.numBytes() == 0) { // no padding at all, return the substring of the current string return substring(0, len); } else { diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala similarity index 97% rename from unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala rename to unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index 6a14ce4977c1d..12a002befa0ac 100644 --- a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala +++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -26,7 +26,10 @@ import org.scalatest.{FunSuite, Matchers} import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8} -class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { +/** + * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing. + */ +class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenPropertyChecks with Matchers { // scalastyle:on test("toString") {