diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 51908d852dbad..1308942f7bedc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -794,6 +794,9 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT override def nullSafeEval(input: Any): Any = { input.asInstanceOf[UTF8String].soundex() } + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + nullSafeCodeGen(ctx, ev, c => s"${ev.primitive} = $c.soundex();") + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index cb17cf5cde08a..0c70a0bf34420 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -335,11 +335,12 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(SoundEx(Literal("ZIN")), "Z500", create_row("s1")) checkEvaluation(SoundEx(Literal("SU")), "S000", create_row("s2")) checkEvaluation(SoundEx(Literal("")), "", create_row("s3")) - checkEvaluation(SoundEx(Literal(null)), null, create_row("s4")) + checkEvaluation(SoundEx(Literal.create(null, StringType)), null, create_row("s4")) + // scalastyle:off // non ascii characters are not allowed in the code, so we disable the scalastyle here. - checkEvaluation(SoundEx(Literal("再见")), "再见", create_row("s5")) - checkEvaluation(SoundEx(Literal("z再見")), "z再見", create_row("s6")) + checkEvaluation(SoundEx(Literal("测试")), "测试", create_row("s5")) + checkEvaluation(SoundEx(Literal("z測試")), "z測試", create_row("s6")) checkEvaluation(SoundEx(Literal("Tschüss")), "Tschüss", create_row("s7")) // scalastyle:on checkEvaluation(SoundEx(Literal("zZ")), "z000", create_row("s8")) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index af3c77105ea9b..234b2c093bf85 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -641,6 +641,9 @@ public int hashCode() { * https://en.wikipedia.org/wiki/Soundex */ public UTF8String soundex() { + if(this == null) { + return null; + } if (numBytes == 0) { return UTF8String.fromBytes(new byte[0]); }