From 349c45be9fba77d870310a6735cb346ad1343e53 Mon Sep 17 00:00:00 2001 From: lgieron Date: Tue, 23 Feb 2016 14:06:21 +0100 Subject: [PATCH 1/5] [SPARK-13515] Make FormatNumber work irrespective of locale. --- .../catalyst/expressions/stringExpressions.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 4be065b30a21f..4d50c31dec053 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{ByteArray, UTF8String} +import java.text.DecimalFormatSymbols //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines expressions for string operations. @@ -939,7 +940,7 @@ case class FormatNumber(x: Expression, d: Expression) private val pattern: StringBuffer = new StringBuffer() @transient - private val numberFormat: DecimalFormat = new DecimalFormat("") + private val numberFormat: DecimalFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US)) override protected def nullSafeEval(xObject: Any, dObject: Any): Any = { val dValue = dObject.asInstanceOf[Int] @@ -962,10 +963,9 @@ case class FormatNumber(x: Expression, d: Expression) pattern.append("0") } } - val dFormat = new DecimalFormat(pattern.toString) lastDValue = dValue - numberFormat.applyPattern(dFormat.toPattern) + numberFormat.applyLocalizedPattern(pattern.toString) } x.dataType match { @@ -992,6 +992,9 @@ case class FormatNumber(x: Expression, d: Expression) val sb = classOf[StringBuffer].getName val df = classOf[DecimalFormat].getName + val dfs = classOf[DecimalFormatSymbols].getName + val l = classOf[Locale].getName + val US = "US" val lastDValue = ctx.freshName("lastDValue") val pattern = ctx.freshName("pattern") val numberFormat = ctx.freshName("numberFormat") @@ -999,7 +1002,7 @@ case class FormatNumber(x: Expression, d: Expression) val dFormat = ctx.freshName("dFormat") ctx.addMutableState("int", lastDValue, s"$lastDValue = -100;") ctx.addMutableState(sb, pattern, s"$pattern = new $sb();") - ctx.addMutableState(df, numberFormat, s"""$numberFormat = new $df("");""") + ctx.addMutableState(df, numberFormat, s"""$numberFormat = new $df("", new $dfs($l.$US));""") s""" if ($d >= 0) { @@ -1013,9 +1016,8 @@ case class FormatNumber(x: Expression, d: Expression) $pattern.append("0"); } } - $df $dFormat = new $df($pattern.toString()); $lastDValue = $d; - $numberFormat.applyPattern($dFormat.toPattern()); + $numberFormat.applyLocalizedPattern($pattern.toString()); } ${ev.value} = UTF8String.fromString($numberFormat.format(${typeHelper(num)})); } else { From 40e1ec8663b160530e0a0471c8abf3a90ddf92cc Mon Sep 17 00:00:00 2001 From: lgieron Date: Fri, 26 Feb 2016 19:40:04 +0100 Subject: [PATCH 2/5] SPARK-13515 Improving code formatting/aesthetics. --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 4d50c31dec053..7432b328a7d62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.text.DecimalFormat +import java.text.DecimalFormatSymbols import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow @@ -25,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{ByteArray, UTF8String} -import java.text.DecimalFormatSymbols //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines expressions for string operations. @@ -940,7 +940,7 @@ case class FormatNumber(x: Expression, d: Expression) private val pattern: StringBuffer = new StringBuffer() @transient - private val numberFormat: DecimalFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US)) + private val numberFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US)) override protected def nullSafeEval(xObject: Any, dObject: Any): Any = { val dValue = dObject.asInstanceOf[Int] From aab31f91b82f1a121c987b39cc72c45a13836fab Mon Sep 17 00:00:00 2001 From: lgieron Date: Sat, 27 Feb 2016 22:47:08 +0100 Subject: [PATCH 3/5] SPARK-13515 Clean up imports. --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 7432b328a7d62..ddc8fa3cf8d3a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -17,8 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.text.DecimalFormat -import java.text.DecimalFormatSymbols +import java.text.{DecimalFormat, DecimalFormatSymbols}; import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow From fef157fd127819345822538579d4324020d7beb4 Mon Sep 17 00:00:00 2001 From: lgieron Date: Sun, 28 Feb 2016 14:43:45 +0100 Subject: [PATCH 4/5] SPARK-13515 Correct formatting as required by lint style checks. --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index ddc8fa3cf8d3a..805b2a67c60ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.text.{DecimalFormat, DecimalFormatSymbols}; +import java.text.{DecimalFormat, DecimalFormatSymbols} import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow From b3b7b15cf9d308c0373ab6fbbedfcf68b6aa0bd6 Mon Sep 17 00:00:00 2001 From: lgieron Date: Sun, 28 Feb 2016 15:14:53 +0100 Subject: [PATCH 5/5] SPARK-13515 Improve clarity with variable renaming and extra comments. --- .../sql/catalyst/expressions/stringExpressions.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 805b2a67c60ea..3ee19cc4ad714 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -938,6 +938,8 @@ case class FormatNumber(x: Expression, d: Expression) @transient private val pattern: StringBuffer = new StringBuffer() + // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.') + // as a decimal separator. @transient private val numberFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US)) @@ -993,7 +995,9 @@ case class FormatNumber(x: Expression, d: Expression) val df = classOf[DecimalFormat].getName val dfs = classOf[DecimalFormatSymbols].getName val l = classOf[Locale].getName - val US = "US" + // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.') + // as a decimal separator. + val usLocale = "US" val lastDValue = ctx.freshName("lastDValue") val pattern = ctx.freshName("pattern") val numberFormat = ctx.freshName("numberFormat") @@ -1001,7 +1005,8 @@ case class FormatNumber(x: Expression, d: Expression) val dFormat = ctx.freshName("dFormat") ctx.addMutableState("int", lastDValue, s"$lastDValue = -100;") ctx.addMutableState(sb, pattern, s"$pattern = new $sb();") - ctx.addMutableState(df, numberFormat, s"""$numberFormat = new $df("", new $dfs($l.$US));""") + ctx.addMutableState(df, numberFormat, + s"""$numberFormat = new $df("", new $dfs($l.$usLocale));""") s""" if ($d >= 0) {