From 243f681af922fe414db5194f1a1765328247d9ce Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 26 Sep 2017 17:47:47 +0100 Subject: [PATCH 1/2] initial commit --- .../java/org/apache/spark/unsafe/types/UTF8String.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index ce4a06bde80c4..b65eda7c39987 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -501,14 +501,13 @@ public UTF8String trim() { int e = this.numBytes - 1; // skip all of the space (0x20) in the left side while (s < this.numBytes && getByte(s) == 0x20) s++; - // skip all of the space (0x20) in the right side - while (e >= 0 && getByte(e) == 0x20) e--; - if (s > e) { + if (s == this.numBytes) { // empty string return EMPTY_UTF8; - } else { - return copyUTF8String(s, e); } + // skip all of the space (0x20) in the right side + while (e >= 0 && getByte(e) == 0x20) e--; + return copyUTF8String(s, e); } /** From d39c648fe8ed690e4aa309f4e58e8484792cfc6c Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 26 Sep 2017 19:04:16 +0100 Subject: [PATCH 2/2] address review comment --- .../main/java/org/apache/spark/unsafe/types/UTF8String.java | 4 ++-- .../java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index b65eda7c39987..b0d0c44823e68 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -498,7 +498,6 @@ private UTF8String copyUTF8String(int start, int end) { public UTF8String trim() { int s = 0; - int e = this.numBytes - 1; // skip all of the space (0x20) in the left side while (s < this.numBytes && getByte(s) == 0x20) s++; if (s == this.numBytes) { @@ -506,7 +505,8 @@ public UTF8String trim() { return EMPTY_UTF8; } // skip all of the space (0x20) in the right side - while (e >= 0 && getByte(e) == 0x20) e--; + int e = this.numBytes - 1; + while (e > s && getByte(e) == 0x20) e--; return copyUTF8String(s, e); } diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index 7b03d2c650fc9..9b303fa5bc6c5 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -222,10 +222,13 @@ public void substring() { @Test public void trims() { + assertEquals(fromString("1"), fromString("1").trim()); + assertEquals(fromString("hello"), fromString(" hello ").trim()); assertEquals(fromString("hello "), fromString(" hello ").trimLeft()); assertEquals(fromString(" hello"), fromString(" hello ").trimRight()); + assertEquals(EMPTY_UTF8, EMPTY_UTF8.trim()); assertEquals(EMPTY_UTF8, fromString(" ").trim()); assertEquals(EMPTY_UTF8, fromString(" ").trimLeft()); assertEquals(EMPTY_UTF8, fromString(" ").trimRight());