-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-8245][SQL] FormatNumber/Length Support for Expression #7034
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,11 +17,10 @@ | |
|
||
package org.apache.spark.sql.catalyst.expressions | ||
|
||
import java.text.DecimalFormat | ||
import java.util.Locale | ||
import java.util.regex.Pattern | ||
|
||
import org.apache.commons.lang3.StringUtils | ||
|
||
import org.apache.spark.sql.catalyst.InternalRow | ||
import org.apache.spark.sql.catalyst.analysis.UnresolvedException | ||
import org.apache.spark.sql.catalyst.expressions.codegen._ | ||
|
@@ -553,17 +552,22 @@ case class Substring(str: Expression, pos: Expression, len: Expression) | |
} | ||
|
||
/** | ||
* A function that return the length of the given string expression. | ||
* A function that return the length of the given string or binary expression. | ||
*/ | ||
case class StringLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { | ||
case class Length(child: Expression) extends UnaryExpression with ExpectsInputTypes { | ||
override def dataType: DataType = IntegerType | ||
override def inputTypes: Seq[DataType] = Seq(StringType) | ||
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType)) | ||
|
||
protected override def nullSafeEval(string: Any): Any = | ||
string.asInstanceOf[UTF8String].numChars | ||
protected override def nullSafeEval(value: Any): Any = child.dataType match { | ||
case StringType => value.asInstanceOf[UTF8String].numChars | ||
case BinaryType => value.asInstanceOf[Array[Byte]].length | ||
} | ||
|
||
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
defineCodeGen(ctx, ev, c => s"($c).numChars()") | ||
child.dataType match { | ||
case StringType => defineCodeGen(ctx, ev, c => s"($c).numChars()") | ||
case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length") | ||
} | ||
} | ||
|
||
override def prettyName: String = "length" | ||
|
@@ -668,3 +672,77 @@ case class Encode(value: Expression, charset: Expression) | |
} | ||
} | ||
|
||
/** | ||
* Formats the number X to a format like '#,###,###.##', rounded to D decimal places, | ||
* and returns the result as a string. If D is 0, the result has no decimal point or | ||
* fractional part. | ||
*/ | ||
case class FormatNumber(x: Expression, d: Expression) | ||
extends BinaryExpression with ExpectsInputTypes { | ||
|
||
override def left: Expression = x | ||
override def right: Expression = d | ||
override def dataType: DataType = StringType | ||
override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, IntegerType) | ||
|
||
// Associated with the pattern, for the last d value, and we will update the | ||
// pattern (DecimalFormat) once the new coming d value differ with the last one. | ||
@transient | ||
private var lastDValue: Int = -100 | ||
|
||
// A cached DecimalFormat, for performance concern, we will change it | ||
// only if the d value changed. | ||
@transient | ||
private val pattern: StringBuffer = new StringBuffer() | ||
|
||
@transient | ||
private val numberFormat: DecimalFormat = new DecimalFormat("") | ||
|
||
override def eval(input: InternalRow): Any = { | ||
val xObject = x.eval(input) | ||
if (xObject == null) { | ||
return null | ||
} | ||
|
||
val dObject = d.eval(input) | ||
|
||
if (dObject == null || dObject.asInstanceOf[Int] < 0) { | ||
return null | ||
} | ||
val dValue = dObject.asInstanceOf[Int] | ||
|
||
if (dValue != lastDValue) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it'd be great to document what's happening here. from what i can tell we are caching the last pattern in order to avoid constant allocating lots of objects. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added some comments in the description of the |
||
// construct a new DecimalFormat only if a new dValue | ||
pattern.delete(0, pattern.length()) | ||
pattern.append("#,###,###,###,###,###,##0") | ||
|
||
// decimal place | ||
if (dValue > 0) { | ||
pattern.append(".") | ||
|
||
var i = 0 | ||
while (i < dValue) { | ||
i += 1 | ||
pattern.append("0") | ||
} | ||
} | ||
val dFormat = new DecimalFormat(pattern.toString()) | ||
lastDValue = dValue; | ||
numberFormat.applyPattern(dFormat.toPattern()) | ||
} | ||
|
||
x.dataType match { | ||
case ByteType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Byte])) | ||
case ShortType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Short])) | ||
case FloatType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Float])) | ||
case IntegerType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Int])) | ||
case LongType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Long])) | ||
case DoubleType => UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Double])) | ||
case _: DecimalType => | ||
UTF8String.fromString(numberFormat.format(xObject.asInstanceOf[Decimal].toJavaBigDecimal)) | ||
} | ||
} | ||
|
||
override def prettyName: String = "format_number" | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
override prettyName
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: this is done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry, yes, it's done, but in the end of this class code.