-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9154][SQL] codegen StringFormat #7546
Changes from 3 commits
086caba
cd8322b
10b4de8
a943d3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -476,7 +476,7 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression) | |
/** | ||
* Returns the input formatted according do printf-style format strings | ||
*/ | ||
case class StringFormat(children: Expression*) extends Expression with CodegenFallback { | ||
case class StringFormat(children: Expression*) extends Expression { | ||
|
||
require(children.nonEmpty, "printf() should take at least 1 argument") | ||
|
||
|
@@ -501,6 +501,32 @@ case class StringFormat(children: Expression*) extends Expression with CodegenFa | |
} | ||
} | ||
|
||
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
val pattern = children.head.gen(ctx) | ||
|
||
val argListGen = children.tail.map(_.gen(ctx)) | ||
val argListCode = argListGen.map(_.code + "\n") | ||
val argListString = argListGen.foldLeft("")((s, v) => s + s", ${v.primitive}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Casting the null to the Boxed type, throws a null pointer exception: int primitive6 = 0;
Object o = (true) ? (Integer) null : primitive6; |
||
|
||
val form = ctx.freshName("formatter") | ||
val formatter = classOf[java.util.Formatter].getName | ||
val sb = ctx.freshName("sb") | ||
val stringBuffer = classOf[StringBuffer].getName | ||
|
||
s""" | ||
${pattern.code} | ||
boolean ${ev.isNull} = ${pattern.isNull}; | ||
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; | ||
if (!${ev.isNull}) { | ||
${argListCode.mkString} | ||
$stringBuffer $sb = new $stringBuffer(); | ||
$formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US); | ||
$form.format(${pattern.primitive}.toString() $argListString); | ||
${ev.primitive} = UTF8String.fromString($sb.toString()); | ||
} | ||
""" | ||
} | ||
|
||
override def prettyName: String = "printf" | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -353,7 +353,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { | |
test("FORMAT") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. existing: would you mind rewriting these to avoid the use of row and just use literals? using a row makes the test cases harder to follow since you have to look in multiple places to understand what is going on. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for cleaning this up! |
||
val f = 'f.string.at(0) | ||
val d1 = 'd.int.at(1) | ||
val s1 = 's.int.at(2) | ||
val s1 = 's.string.at(2) | ||
|
||
val row1 = create_row("aa%d%s", 12, "cc") | ||
val row2 = create_row(null, 12, "cc") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add a test where one of the input arguments is null (both for a primitive type and for string)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do we expect, if an Integer value is null? |
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should probably add
ImplicitCastInputTypes
with a string for the first argument andAnyDataType
for remaining children.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do have to split the signature for this to
StringFormat(string: Expression, args: Expression*)
, don't I?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I don't think so. We just zip with
children
to check the types so I think it can work either way. Just remember to subtract 1 when figuring out how many AnyDataTypes to fill in.